diff --git a/toolbox/MMDetection/patch/mmdetection/_base_/datasets/coco_detection.py b/toolbox/MMDetection/patch/mmdetection/_base_/datasets/coco_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..fdf8dfad9476b1d7b7a4e8c3e2832f115a1ea7f2 --- /dev/null +++ b/toolbox/MMDetection/patch/mmdetection/_base_/datasets/coco_detection.py @@ -0,0 +1,95 @@ +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' + +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/detection/', +# 'data/': 's3://openmmlab/datasets/detection/' +# })) +backend_args = None + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args=backend_args), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile', backend_args=backend_args), + dict(type='Resize', scale=(1333, 800), keep_ratio=True), + # If you don't have a gt annotation, delete the pipeline + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=2, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=dict(type='AspectRatioBatchSampler'), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotations/instances_train2017.json', + data_prefix=dict(img='train2017/'), + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=train_pipeline, + backend_args=backend_args)) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotations/instances_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline, + backend_args=backend_args)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/instances_val2017.json', + metric='bbox', + format_only=False, + backend_args=backend_args) +test_evaluator = val_evaluator + +# inference on test dataset and +# format the output results for submission. +# test_dataloader = dict( +# batch_size=1, +# num_workers=2, +# persistent_workers=True, +# drop_last=False, +# sampler=dict(type='DefaultSampler', shuffle=False), +# dataset=dict( +# type=dataset_type, +# data_root=data_root, +# ann_file=data_root + 'annotations/image_info_test-dev2017.json', +# data_prefix=dict(img='test2017/'), +# test_mode=True, +# pipeline=test_pipeline)) +# test_evaluator = dict( +# type='CocoMetric', +# metric='bbox', +# format_only=True, +# ann_file=data_root + 'annotations/image_info_test-dev2017.json', +# outfile_prefix='./work_dirs/coco_detection/test') diff --git a/toolbox/MMDetection/patch/mmdetection/_base_/default_runtime.py b/toolbox/MMDetection/patch/mmdetection/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..870e5614c86d7e1bbdad13d77a0db03a46ce717a --- /dev/null +++ b/toolbox/MMDetection/patch/mmdetection/_base_/default_runtime.py @@ -0,0 +1,24 @@ +default_scope = 'mmdet' + +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=1), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='DetVisualizationHook')) + +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) + +log_level = 'INFO' +load_from = None +resume = False diff --git a/toolbox/MMDetection/patch/mmdetection/_base_/schedules/schedule_1x.py b/toolbox/MMDetection/patch/mmdetection/_base_/schedules/schedule_1x.py new file mode 100644 index 0000000000000000000000000000000000000000..95f30be74ff37080ba0d227d55bbd587feeaa892 --- /dev/null +++ b/toolbox/MMDetection/patch/mmdetection/_base_/schedules/schedule_1x.py @@ -0,0 +1,28 @@ +# training schedule for 1x +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=12, + by_epoch=True, + milestones=[8, 11], + gamma=0.1) +] + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) + +# Default setting for scaling LR automatically +# - `enable` means enable scaling LR automatically +# or not by default. +# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). +auto_scale_lr = dict(enable=False, base_batch_size=16) diff --git a/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_l_8xb32-300e_coco.py b/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_l_8xb32-300e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..1cce4d89c84a81d7aa22197cd6dd70fe08637a35 --- /dev/null +++ b/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_l_8xb32-300e_coco.py @@ -0,0 +1,179 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/schedules/schedule_1x.py', + '../_base_/datasets/coco_detection.py', './rtmdet_tta.py' +] +model = dict( + type='RTMDet', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + batch_augments=None), + backbone=dict( + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU', inplace=True)), + neck=dict( + type='CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU', inplace=True)), + bbox_head=dict( + type='RTMDetSepBNHead', + num_classes=80, + in_channels=256, + stacked_convs=2, + feat_channels=256, + anchor_generator=dict( + type='MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict(type='DistancePointBBoxCoder'), + loss_cls=dict( + type='QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU', inplace=True)), + train_cfg=dict( + assigner=dict(type='DynamicSoftLabelAssigner', topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=30000, + min_bbox_size=0, + score_thr=0.001, + nms=dict(type='nms', iou_threshold=0.65), + max_per_img=300), +) + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), + dict( + type='RandomResize', + scale=(1280, 1280), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), + dict( + type='CachedMixUp', + img_scale=(640, 640), + ratio_range=(1.0, 1.0), + max_cached_images=20, + pad_val=(114, 114, 114)), + dict(type='PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='RandomResize', + scale=(640, 640), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), + dict(type='PackDetInputs') +] + +test_pipeline = [ + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), + dict(type='Resize', scale=(640, 640), keep_ratio=True), + dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +train_dataloader = dict( + batch_size=32, + num_workers=10, + batch_sampler=None, + pin_memory=True, + dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=5, num_workers=10, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +max_epochs = 300 +stage2_num_epochs = 20 +base_lr = 0.004 +interval = 10 + +train_cfg = dict( + max_epochs=max_epochs, + val_interval=interval, + dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)]) + +val_evaluator = dict(proposal_nums=(100, 1, 10)) +test_evaluator = val_evaluator + +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# hooks +default_hooks = dict( + checkpoint=dict( + interval=interval, + max_keep_ckpts=3 # only keep latest 3 checkpoints + )) +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] diff --git a/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_s_8xb32-300e_coco.py b/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_s_8xb32-300e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..cbf76247b74e94735eea0dd70ce6ac9e57f4dadf --- /dev/null +++ b/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_s_8xb32-300e_coco.py @@ -0,0 +1,62 @@ +_base_ = './rtmdet_l_8xb32-300e_coco.py' +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa +model = dict( + backbone=dict( + deepen_factor=0.33, + widen_factor=0.5, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), + bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)) + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), + dict( + type='RandomResize', + scale=(1280, 1280), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), + dict( + type='CachedMixUp', + img_scale=(640, 640), + ratio_range=(1.0, 1.0), + max_cached_images=20, + pad_val=(114, 114, 114)), + dict(type='PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='RandomResize', + scale=(640, 640), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), + dict(type='PackDetInputs') +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='PipelineSwitchHook', + switch_epoch=280, + switch_pipeline=train_pipeline_stage2) +] diff --git a/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py b/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..a686f4a7f0c4c3bed956c2a3fa504ea8863c669d --- /dev/null +++ b/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py @@ -0,0 +1,43 @@ +_base_ = './rtmdet_s_8xb32-300e_coco.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict(in_channels=96, feat_channels=96, exp_on_reg=False)) + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='CachedMosaic', + img_scale=(640, 640), + pad_val=114.0, + max_cached_images=20, + random_pop=False), + dict( + type='RandomResize', + scale=(1280, 1280), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='YOLOXHSVRandomAug'), + dict(type='RandomFlip', prob=0.5), + dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), + dict( + type='CachedMixUp', + img_scale=(640, 640), + ratio_range=(1.0, 1.0), + max_cached_images=10, + random_pop=False, + pad_val=(114, 114, 114), + prob=0.5), + dict(type='PackDetInputs') +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_tta.py b/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_tta.py new file mode 100644 index 0000000000000000000000000000000000000000..6dde36de3ff06576944a351de9daf53746103f21 --- /dev/null +++ b/toolbox/MMDetection/patch/mmdetection/configs/rtmdet/rtmdet_tta.py @@ -0,0 +1,36 @@ +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.6), max_per_img=100)) + +img_scales = [(640, 640), (320, 320), (960, 960)] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=None), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale=s, keep_ratio=True) + for s in img_scales + ], + [ + # ``RandomFlip`` must be placed before ``Pad``, otherwise + # bounding box coordinates after flipping cannot be + # recovered correctly. + dict(type='RandomFlip', prob=1.), + dict(type='RandomFlip', prob=0.) + ], + [ + dict( + type='Pad', + size=(960, 960), + pad_val=dict(img=(114, 114, 114))), + ], + [dict(type='LoadAnnotations', with_bbox=True)], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor', 'flip', 'flip_direction')) + ] + ]) +] diff --git a/toolbox/MMDetection/prepare_mmdetection.sh b/toolbox/MMDetection/prepare_mmdetection.sh index af0aa6164dd40c0b15fe52cb4e194865d173eae8..d94d03dd018e9b42ef5f800e9f95b29d4b2501bc 100644 --- a/toolbox/MMDetection/prepare_mmdetection.sh +++ b/toolbox/MMDetection/prepare_mmdetection.sh @@ -30,6 +30,7 @@ echo "=====Prepare mmdetection ${MMDET_VERSION} START=====" git clone --depth 1 -b ${MMDET_VERSION} https://github.com/open-mmlab/mmdetection.git +# configs/rtmdet reference from https://github.com/open-mmlab/mmdetection/tree/fe3f809a0a514189baf889aa358c498d51ee36cd/configs/rtmdet which is first added in v3.0.0rc1 cp -r -T patch/mmdetection/ mmdetection/ cd mmdetection/