diff --git a/models/cv/detection/foveabox/ixrt/README.md b/models/cv/detection/foveabox/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d86a2c3961f6ac2da5700d345380ab4415331f6b --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/README.md @@ -0,0 +1,67 @@ +# FoveaBox + +## Description + +FoveaBox is an advanced anchor-free object detection framework that enhances accuracy and flexibility by directly predicting the existence and bounding box coordinates of objects. Utilizing a Feature Pyramid Network (FPN), it adeptly handles targets of varying scales, particularly excelling with objects of arbitrary aspect ratios. FoveaBox also demonstrates robustness against image deformations. + +## Setup + +### Install + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-dev + +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install ultralytics +pip3 install pycocotools +pip3 install mmdeploy +pip3 install mmdet +pip3 install opencv-python==4.6.0.66 +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +# export onnx model +python3 export.py --weight fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth --cfg fovea_r50_fpn_4xb4-1x_coco.py --output foveabox.onnx + +# Use onnxsim optimize onnx model +onnxsim foveabox.onnx foveabox_opt.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_foveabox_fp16_accuracy.sh +# Performance +bash scripts/infer_foveabox_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +---------|-----------|----------|----------|----------|---------------| +FoveaBox | 32 | FP16 | 181.304 | 0.531 | 0.346 | + +## Reference + +mmdetection: diff --git a/models/cv/detection/foveabox/ixrt/build_engine.py b/models/cv/detection/foveabox/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..1bba10d0ee686cd6931161a249a6be887ea7cb24 --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/build_engine.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/foveabox/ixrt/common.py b/models/cv/detection/foveabox/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..ef92a6ba6291058d20f575edb09da35ebff3a937 --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/common.py @@ -0,0 +1,69 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +from cuda import cuda, cudart + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/detection/foveabox/ixrt/config/FOVEABOX_CONFIG b/models/cv/detection/foveabox/ixrt/config/FOVEABOX_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..635c2b0de69ed23231edfa091badcffa3c5cc446 --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/config/FOVEABOX_CONFIG @@ -0,0 +1,31 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=800 +MODEL_NAME=foveabox +ORIGINE_MODEL=foveabox_opt.onnx +DATA_PROCESS_TYPE=foveabox +MODEL_INPUT_NAMES=(input) diff --git a/models/cv/detection/foveabox/ixrt/deploy_default.py b/models/cv/detection/foveabox/ixrt/deploy_default.py new file mode 100644 index 0000000000000000000000000000000000000000..b8d8e43dc829456f0c2e46a7acfc3128757f945d --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/deploy_default.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +onnx_config = dict( + type='onnx', + export_params=True, + keep_initializers_as_inputs=False, + opset_version=11, + save_file='end2end.onnx', + input_names=['input'], + output_names=['output'], + input_shape=None, + optimize=True) + +codebase_config = dict( + type='mmdet', + task='ObjectDetection', + model_type='end2end', + post_processing=dict( + score_threshold=0.05, + confidence_threshold=0.005, + iou_threshold=0.5, + max_output_boxes_per_class=200, + pre_top_k=5000, + keep_top_k=100, + background_label_id=-1, + )) + +backend_config = dict(type='onnxruntime') \ No newline at end of file diff --git a/models/cv/detection/foveabox/ixrt/export.py b/models/cv/detection/foveabox/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..bceaba7801843feb5ef095f62a71a2f6e0074db4 --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/export.py @@ -0,0 +1,74 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse + +import torch +from mmdeploy.utils import load_config +from mmdeploy.apis import build_task_processor + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--cfg", + type=str, + required=True, + help="model config file.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + deploy_cfg = 'deploy_default.py' + model_cfg = args.cfg + model_checkpoint = args.weight + + deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) + + task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu') + + model = task_processor.build_pytorch_model(model_checkpoint) + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 800, 800) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == '__main__': + main() + diff --git a/models/cv/detection/foveabox/ixrt/fovea_r50_fpn_4xb4-1x_coco.py b/models/cv/detection/foveabox/ixrt/fovea_r50_fpn_4xb4-1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..5be9e50af49009813315b0f7b9fc3221134d8ee1 --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/fovea_r50_fpn_4xb4-1x_coco.py @@ -0,0 +1,287 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +auto_scale_lr = dict(base_batch_size=16, enable=False) +backend_args = None +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +default_hooks = dict( + checkpoint=dict(interval=1, type='CheckpointHook'), + logger=dict(interval=50, type='LoggerHook'), + param_scheduler=dict(type='ParamSchedulerHook'), + sampler_seed=dict(type='DistSamplerSeedHook'), + timer=dict(type='IterTimerHook'), + visualization=dict(type='DetVisualizationHook')) +default_scope = 'mmdet' +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +load_from = None +log_level = 'INFO' +log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) +model = dict( + backbone=dict( + depth=50, + frozen_stages=1, + init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'), + norm_cfg=dict(requires_grad=True, type='BN'), + norm_eval=True, + num_stages=4, + out_indices=( + 0, + 1, + 2, + 3, + ), + style='pytorch', + type='ResNet'), + bbox_head=dict( + base_edge_list=[ + 16, + 32, + 64, + 128, + 256, + ], + feat_channels=256, + in_channels=256, + loss_bbox=dict(beta=0.11, loss_weight=1.0, type='SmoothL1Loss'), + loss_cls=dict( + alpha=0.4, + gamma=1.5, + loss_weight=1.0, + type='FocalLoss', + use_sigmoid=True), + num_classes=80, + scale_ranges=( + ( + 1, + 64, + ), + ( + 32, + 128, + ), + ( + 64, + 256, + ), + ( + 128, + 512, + ), + ( + 256, + 2048, + ), + ), + sigma=0.4, + stacked_convs=4, + strides=[ + 8, + 16, + 32, + 64, + 128, + ], + type='FoveaHead', + with_deform=False), + data_preprocessor=dict( + bgr_to_rgb=True, + mean=[ + 123.675, + 116.28, + 103.53, + ], + pad_size_divisor=32, + std=[ + 58.395, + 57.12, + 57.375, + ], + type='DetDataPreprocessor'), + neck=dict( + add_extra_convs='on_input', + in_channels=[ + 256, + 512, + 1024, + 2048, + ], + num_outs=5, + out_channels=256, + start_level=1, + type='FPN'), + test_cfg=dict( + max_per_img=100, + nms=dict(iou_threshold=0.5, type='nms'), + nms_pre=1000, + score_thr=0.05), + train_cfg=dict(), + type='FOVEA') +optim_wrapper = dict( + optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001), + type='OptimWrapper') +param_scheduler = [ + dict( + begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'), + dict( + begin=0, + by_epoch=True, + end=12, + gamma=0.1, + milestones=[ + 8, + 11, + ], + type='MultiStepLR'), +] +resume = False +test_cfg = dict(type='TestLoop') +test_dataloader = dict( + batch_size=32, + dataset=dict( + ann_file='annotations/instances_val2017.json', + backend_args=None, + data_prefix=dict(img='images/val2017/'), + data_root='data/coco/', + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +test_evaluator = dict( + ann_file='data/coco/annotations/instances_val2017.json', + backend_args=None, + format_only=False, + metric='bbox', + type='CocoMetric') +test_pipeline = [ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), +] +train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1) +train_dataloader = dict( + batch_sampler=dict(type='AspectRatioBatchSampler'), + batch_size=4, + dataset=dict( + ann_file='annotations/instances_train2017.json', + backend_args=None, + data_prefix=dict(img='train2017/'), + data_root='data/coco/', + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PackDetInputs'), + ], + type='CocoDataset'), + num_workers=4, + persistent_workers=True, + sampler=dict(shuffle=True, type='DefaultSampler')) +train_pipeline = [ + dict(backend_args=None, type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PackDetInputs'), +] +val_cfg = dict(type='ValLoop') +val_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file='annotations/instances_val2017.json', + backend_args=None, + data_prefix=dict(img='val2017/'), + data_root='data/coco/', + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +val_evaluator = dict( + ann_file='data/coco/annotations/instances_val2017.json', + backend_args=None, + format_only=False, + metric='bbox', + type='CocoMetric') +vis_backends = [ + dict(type='LocalVisBackend'), +] +visualizer = dict( + name='visualizer', + type='DetLocalVisualizer', + vis_backends=[ + dict(type='LocalVisBackend'), + ]) +work_dir = './' diff --git a/models/cv/detection/foveabox/ixrt/inference.py b/models/cv/detection/foveabox/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..bcfe0e22d465482280149396b099fdd95e6681f7 --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/inference.py @@ -0,0 +1,193 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import torch +import torchvision +import numpy as np +import time +from tqdm import tqdm +from mmdet.registry import RUNNERS +from mmengine.config import Config + +import tensorrt +from common import create_engine_context, get_io_bindings +from cuda import cuda, cudart + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine_file", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=0.0, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + batch_size = args.batchsize + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(args.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # just run perf test + if args.perf_only: + torch.cuda.synchronize() + start_time = time.time() + + for i in range(100): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if 100 * batch_size < num_samples: + num_samples = 100 * batch_size + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {args.fps_target}") + if fps >= args.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + else: + # warm up + print("\nWarm Start.") + for i in range(args.warmup): + context.execute_v2(allocations) + print("Warm Done.") + + # runner config + cfg = Config.fromfile("fovea_r50_fpn_4xb4-1x_coco.py") + + cfg.work_dir = "./workspace" + cfg['test_dataloader']['batch_size'] = batch_size + cfg['test_dataloader']['dataset']['data_root'] = args.datasets + cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' + cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json') + cfg['log_level'] = 'ERROR' + + # build runner + runner = RUNNERS.build(cfg) + + for input_data in tqdm(runner.test_dataloader): + + input_data = runner.model.data_preprocessor(input_data, False) + image = input_data['inputs'].cpu().numpy() + + pad_batch = len(image) != batch_size + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + batch_data = image.astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(image) + + (err,) = cudart.cudaMemcpy( + inputs[0]["allocation"], + batch_data, + batch_data.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, + ) + assert err == cudart.cudaError_t.cudaSuccess + + context.execute_v2(allocations) + + cls_score = [] + box_reg = [] + + for i in range(len(outputs)): + output = np.zeros(outputs[i]["shape"], outputs[i]["dtype"]) + (err,) = cudart.cudaMemcpy( + output, + outputs[i]["allocation"], + outputs[i]["nbytes"], + cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, + ) + assert err == cudart.cudaError_t.cudaSuccess + + if pad_batch: + output = output[:origin_size] + + output = torch.from_numpy(output) + + if output.shape[1] == 80: + cls_score.append(output) + elif output.shape[1] == 4: + box_reg.append(output) + + batch_img_metas = [ + data_samples.metainfo for data_samples in input_data['data_samples'] + ] + + results_list = runner.model.bbox_head.predict_by_feat(cls_score, box_reg, batch_img_metas=batch_img_metas, rescale=True) + + batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], results_list) + + runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=input_data) + + metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/foveabox/ixrt/modify_batchsize.py b/models/cv/detection/foveabox/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..3a88c1603bd6f457fd4965257627dc29edcda4d1 --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/modify_batchsize.py @@ -0,0 +1,52 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_accuracy.sh b/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..4e73f22b7f1ab492676a7e6c0ebd36058b4fff2c --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_accuracy.sh @@ -0,0 +1,123 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=${PROJ_DIR:-"."} +DATASETS_DIR="${DATASETS_DIR}" +CHECKPOINTS_DIR="${PROJ_DIR}" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${PROJ_DIR}/config/FOVEABOX_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine ${ENGINE_FILE} \ + --batchsize ${BSZ} \ + --input_name input \ + --datasets ${DATASETS_DIR}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_performance.sh b/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..eb96bcb158443a9605be5a641032c2661050e402 --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_performance.sh @@ -0,0 +1,124 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=${PROJ_DIR:-"."} +DATASETS_DIR="${DATASETS_DIR}" +CHECKPOINTS_DIR="${PROJ_DIR}" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${PROJ_DIR}/config/FOVEABOX_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine ${ENGINE_FILE} \ + --batchsize ${BSZ} \ + --input_name input \ + --perf_only True \ + --datasets ${DATASETS_DIR}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/foveabox/ixrt/simplify_model.py b/models/cv/detection/foveabox/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1400fd81ddb4b3fae1b20d0fd35082a692f5d292 --- /dev/null +++ b/models/cv/detection/foveabox/ixrt/simplify_model.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file