diff --git a/models/cv/object_detection/gfl/igie/README.md b/models/cv/object_detection/gfl/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6fc8553fbf9c20a2e9b8eb55a33fba48814e8ad8 --- /dev/null +++ b/models/cv/object_detection/gfl/igie/README.md @@ -0,0 +1,66 @@ +# GFL (IGIE) + +## Model Description + +GFL (Generalized Focal Loss) is an object detection model that utilizes an improved focal loss function to address the class imbalance problem, enhancing classification capability and improving the detection accuracy of multi-scale objects and the precision of bounding box predictions. It is suitable for object detection tasks in complex scenes. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.06 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r requirements.txt +``` + +### Model Conversion + +```bash +# export onnx model +python3 export.py --weight gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth --cfg gfl_r50_fpn_1x_coco.py --output gfl.onnx + +# use onnxsim optimize onnx model +onnxsim gfl.onnx gfl_opt.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_gfl_fp16_accuracy.sh +# Performance +bash scripts/infer_gfl_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | +|-------|-----------|-----------|--------|---------|--------------| +| GFL | 32 | FP16 | 139.78 | 0.552 | 0.378 | + +## References + +- [mmdetection](https://github.com/open-mmlab/mmdetection.git) diff --git a/models/cv/object_detection/gfl/igie/build_engine.py b/models/cv/object_detection/gfl/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..54aa8847e5ceafba3f40d44f0e1e280d4740d870 --- /dev/null +++ b/models/cv/object_detection/gfl/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/gfl/igie/ci/prepare.sh b/models/cv/object_detection/gfl/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..a488e8e0d3589d02384050659f11a625da196165 --- /dev/null +++ b/models/cv/object_detection/gfl/igie/ci/prepare.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install -r requirements.txt + +# export onnx model +python3 export.py --weight gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth --cfg gfl_r50_fpn_1x_coco.py --output gfl.onnx + +# use onnxsim optimize onnx model +onnxsim gfl.onnx gfl_opt.onnx \ No newline at end of file diff --git a/models/cv/object_detection/gfl/igie/deploy_default.py b/models/cv/object_detection/gfl/igie/deploy_default.py new file mode 100644 index 0000000000000000000000000000000000000000..e6c4d46abafaf80eac32f3fd8a2b68e245d8fe01 --- /dev/null +++ b/models/cv/object_detection/gfl/igie/deploy_default.py @@ -0,0 +1,41 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +onnx_config = dict( + type='onnx', + export_params=True, + keep_initializers_as_inputs=False, + opset_version=11, + save_file='end2end.onnx', + input_names=['input'], + output_names=['output'], + input_shape=None, + optimize=True) + +codebase_config = dict( + type='mmdet', + task='ObjectDetection', + model_type='end2end', + post_processing=dict( + score_threshold=0.05, + confidence_threshold=0.005, + iou_threshold=0.5, + max_output_boxes_per_class=200, + pre_top_k=5000, + keep_top_k=100, + background_label_id=-1, + )) + +backend_config = dict(type='onnxruntime') \ No newline at end of file diff --git a/models/cv/object_detection/gfl/igie/export.py b/models/cv/object_detection/gfl/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..eed56e0c77e98b6e9a86809b296201b9c07d8c9a --- /dev/null +++ b/models/cv/object_detection/gfl/igie/export.py @@ -0,0 +1,72 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse + +import torch +from mmdeploy.utils import load_config +from mmdeploy.apis import build_task_processor + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--cfg", + type=str, + required=True, + help="model config file.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + deploy_cfg = 'deploy_default.py' + model_cfg = args.cfg + model_checkpoint = args.weight + + deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) + + task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu') + + model = task_processor.build_pytorch_model(model_checkpoint) + + input_names = ['input'] + dynamic_axes = {'input': {0: '-1'}} + dummy_input = torch.randn(1, 3, 800, 800) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == '__main__': + main() diff --git a/models/cv/object_detection/gfl/igie/gfl_r50_fpn_1x_coco.py b/models/cv/object_detection/gfl/igie/gfl_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..cac53a3a8060ce0af1872b8996e454fbdc9b4424 --- /dev/null +++ b/models/cv/object_detection/gfl/igie/gfl_r50_fpn_1x_coco.py @@ -0,0 +1,270 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +auto_scale_lr = dict(base_batch_size=16, enable=False) +backend_args = None +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +default_hooks = dict( + checkpoint=dict(interval=1, type='CheckpointHook'), + logger=dict(interval=50, type='LoggerHook'), + param_scheduler=dict(type='ParamSchedulerHook'), + sampler_seed=dict(type='DistSamplerSeedHook'), + timer=dict(type='IterTimerHook'), + visualization=dict(type='DetVisualizationHook')) +default_scope = 'mmdet' +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +load_from = None +log_level = 'ERROR' +log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) +model = dict( + backbone=dict( + depth=50, + frozen_stages=1, + init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'), + norm_cfg=dict(requires_grad=True, type='BN'), + norm_eval=True, + num_stages=4, + out_indices=( + 0, + 1, + 2, + 3, + ), + style='pytorch', + type='ResNet'), + bbox_head=dict( + anchor_generator=dict( + octave_base_scale=8, + ratios=[ + 1.0, + ], + scales_per_octave=1, + strides=[ + 8, + 16, + 32, + 64, + 128, + ], + type='AnchorGenerator'), + feat_channels=256, + in_channels=256, + loss_bbox=dict(loss_weight=2.0, type='GIoULoss'), + loss_cls=dict( + beta=2.0, + loss_weight=1.0, + type='QualityFocalLoss', + use_sigmoid=True), + loss_dfl=dict(loss_weight=0.25, type='DistributionFocalLoss'), + num_classes=80, + reg_max=16, + stacked_convs=4, + type='GFLHead'), + data_preprocessor=dict( + bgr_to_rgb=True, + mean=[ + 123.675, + 116.28, + 103.53, + ], + pad_size_divisor=32, + std=[ + 58.395, + 57.12, + 57.375, + ], + type='DetDataPreprocessor'), + neck=dict( + add_extra_convs='on_output', + in_channels=[ + 256, + 512, + 1024, + 2048, + ], + num_outs=5, + out_channels=256, + start_level=1, + type='FPN'), + test_cfg=dict( + max_per_img=100, + min_bbox_size=0, + nms=dict(iou_threshold=0.6, type='nms'), + nms_pre=1000, + score_thr=0.05), + train_cfg=dict( + allowed_border=-1, + assigner=dict(topk=9, type='ATSSAssigner'), + debug=False, + pos_weight=-1), + type='GFL') +optim_wrapper = dict( + optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001), + type='OptimWrapper') +param_scheduler = [ + dict( + begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'), + dict( + begin=0, + by_epoch=True, + end=12, + gamma=0.1, + milestones=[ + 8, + 11, + ], + type='MultiStepLR'), +] +resume = False +test_cfg = dict(type='TestLoop') +test_dataloader = dict( + batch_size=32, + dataset=dict( + ann_file='annotations/instances_val2017.json', + backend_args=None, + data_prefix=dict(img='images/val2017/'), + data_root='data/coco/', + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +test_evaluator = dict( + ann_file= + 'data/coco/annotations/instances_val2017.json', + backend_args=None, + format_only=False, + metric='bbox', + type='CocoMetric') +test_pipeline = [ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), +] +train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1) +train_dataloader = dict( + batch_sampler=dict(type='AspectRatioBatchSampler'), + batch_size=2, + dataset=dict( + ann_file='annotations/instances_train2017.json', + backend_args=None, + data_prefix=dict(img='train2017/'), + data_root='data/coco/', + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PackDetInputs'), + ], + type='CocoDataset'), + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=True, type='DefaultSampler')) +train_pipeline = [ + dict(backend_args=None, type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PackDetInputs'), +] +val_cfg = dict(type='ValLoop') +val_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file='annotations/instances_val2017.json', + backend_args=None, + data_prefix=dict(img='val2017/'), + data_root='data/coco/', + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +val_evaluator = dict( + ann_file='data/coco/annotations/instances_val2017.json', + backend_args=None, + format_only=False, + metric='bbox', + type='CocoMetric') +vis_backends = [ + dict(type='LocalVisBackend'), +] +visualizer = dict( + name='visualizer', + type='DetLocalVisualizer', + vis_backends=[ + dict(type='LocalVisBackend'), + ]) +work_dir = './workspace' diff --git a/models/cv/object_detection/gfl/igie/inference.py b/models/cv/object_detection/gfl/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..cb0c30d4d398ab46add2cf4a083023197dd466a3 --- /dev/null +++ b/models/cv/object_detection/gfl/igie/inference.py @@ -0,0 +1,157 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import tvm +import torch +import numpy as np +from tvm import relay +from tqdm import tqdm +from mmdet.registry import RUNNERS +from mmengine.config import Config + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # runner config + cfg = Config.fromfile("gfl_r50_fpn_1x_coco.py") + + cfg.work_dir = "./workspace" + cfg['test_dataloader']['batch_size'] = batch_size + cfg['test_dataloader']['dataset']['data_root'] = args.datasets + cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' + cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json') + cfg['log_level'] = 'ERROR' + + # build runner + runner = RUNNERS.build(cfg) + + for data in tqdm(runner.test_dataloader): + cls_score = [] + box_reg = [] + + input_data = runner.model.data_preprocessor(data, False) + image = input_data['inputs'].cpu() + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input("input", tvm.nd.array(image, device)) + + module.run() + + for i in range(module.get_num_outputs()): + output = module.get_output(i).asnumpy() + + if pad_batch: + output = output[:origin_size] + + output = torch.from_numpy(output) + + if output.shape[1] == 80: + cls_score.append(output) + else: + box_reg.append(output) + + batch_img_metas = [ + data_samples.metainfo for data_samples in data['data_samples'] + ] + + preds = runner.model.bbox_head.predict_by_feat( + cls_score, box_reg, batch_img_metas=batch_img_metas, rescale=True + ) + + batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], preds) + + runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=data) + + metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset)) + + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/gfl/igie/requirements.txt b/models/cv/object_detection/gfl/igie/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..b6b3fff4aa66c7401a67b2874e40c3caf154a34d --- /dev/null +++ b/models/cv/object_detection/gfl/igie/requirements.txt @@ -0,0 +1,6 @@ +onnx +tqdm +onnxsim +mmdet==3.3.0 +mmdeploy==1.3.1 +mmengine==0.10.4 diff --git a/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_accuracy.sh b/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..63f8d669ca4276b1890066487a11876bcfa8466d --- /dev/null +++ b/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="gfl_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,800,800 \ + --precision fp16 \ + --engine_path gfl_opt_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine gfl_opt_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} diff --git a/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_performance.sh b/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..2cf32f975430772948fc317b655959862bf79203 --- /dev/null +++ b/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="gfl_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,800,800 \ + --precision fp16 \ + --engine_path gfl_opt_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine gfl_opt_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True diff --git a/models/cv/semantic_segmentation/unet/igie/README.md b/models/cv/semantic_segmentation/unet/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a5745b0e7e432140ef9af6cc7efef94feb087913 --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/README.md @@ -0,0 +1,67 @@ +# UNet (IGIE) + +## Model Description + +UNet is a convolutional neural network architecture for image segmentation, featuring a symmetric encoder-decoder structure. The encoder gradually extracts features and reduces spatial dimensions, while the decoder restores resolution through upsampling. Key skip connections allow high-resolution features to be directly passed to the decoder, enhancing detail retention and segmentation accuracy. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.06 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the dataset. + +### Install Dependencies + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r requirements.txt +``` + +### Model Conversion + +```bash +# export onnx model + +python3 export.py --weight fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth --cfg fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py --output unet.onnx + +# use onnxsim optimize onnx model +onnxsim unet.onnx unet_opt.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/cityscapes/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_unet_fp16_accuracy.sh +# Performance +bash scripts/infer_unet_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | mIoU | +|-------|-----------|-----------|--------|---------| +| UNet | 16 | FP16 | 66.265 | 69.48 | + +## References + +- [mmsegmentation](https://github.com/open-mmlab/mmsegmentation) diff --git a/models/cv/semantic_segmentation/unet/igie/build_engine.py b/models/cv/semantic_segmentation/unet/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..54aa8847e5ceafba3f40d44f0e1e280d4740d870 --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/semantic_segmentation/unet/igie/ci/prepare.sh b/models/cv/semantic_segmentation/unet/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..5746110336d30c1b9a2b5a325e09526f3b0f030a --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/ci/prepare.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install -r requirements.txt + +# export onnx model +python3 export.py --weight fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth --cfg fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py --output unet.onnx + +# use onnxsim optimize onnx model +onnxsim unet.onnx unet_opt.onnx diff --git a/models/cv/semantic_segmentation/unet/igie/deploy_default.py b/models/cv/semantic_segmentation/unet/igie/deploy_default.py new file mode 100644 index 0000000000000000000000000000000000000000..33515ff19245cd98d86fa0937c4cd22e773e71cc --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/deploy_default.py @@ -0,0 +1,29 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +onnx_config = dict( + type='onnx', + export_params=True, + keep_initializers_as_inputs=False, + opset_version=11, + save_file='end2end.onnx', + input_names=['input'], + output_names=['output'], + input_shape=None, + optimize=True) + +codebase_config = dict(type='mmseg', task='Segmentation', with_argmax=True) + +backend_config = dict(type='onnxruntime') \ No newline at end of file diff --git a/models/cv/semantic_segmentation/unet/igie/export.py b/models/cv/semantic_segmentation/unet/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..d460a75966d147e19a0927b477f13ea1a1ded9d8 --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/export.py @@ -0,0 +1,73 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse + +import torch +from mmdeploy.utils import load_config +from mmdeploy.apis import build_task_processor + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--cfg", + type=str, + required=True, + help="model config file.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + deploy_cfg = 'deploy_default.py' + model_cfg = args.cfg + model_checkpoint = args.weight + + deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) + + task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu') + + model = task_processor.build_pytorch_model(model_checkpoint) + + input_names = ['input'] + dynamic_axes = {'input': {0: '-1'}} + + dummy_input = torch.randn(1, 3, 512, 1024) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == '__main__': + main() diff --git a/models/cv/semantic_segmentation/unet/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py b/models/cv/semantic_segmentation/unet/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..b0f3cb1f4e799f31fdd742a199d3e8b809010297 --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py @@ -0,0 +1,331 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +crop_size = ( + 512, + 1024, +) +data_preprocessor = dict( + bgr_to_rgb=True, + mean=[ + 123.675, + 116.28, + 103.53, + ], + pad_val=0, + seg_pad_val=255, + size=( + 512, + 1024, + ), + std=[ + 58.395, + 57.12, + 57.375, + ], + type='SegDataPreProcessor') +data_root = 'data/cityscapes/' +dataset_type = 'CityscapesDataset' +default_hooks = dict( + checkpoint=dict(by_epoch=False, interval=16000, type='CheckpointHook'), + logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'), + param_scheduler=dict(type='ParamSchedulerHook'), + sampler_seed=dict(type='DistSamplerSeedHook'), + timer=dict(type='IterTimerHook'), + visualization=dict(type='SegVisualizationHook')) +default_scope = 'mmseg' +env_cfg = dict( + cudnn_benchmark=True, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +img_ratios = [ + 0.5, + 0.75, + 1.0, + 1.25, + 1.5, + 1.75, +] +launcher = 'none' +load_from = '../fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth' +log_level = 'ERROR' +log_processor = dict(by_epoch=False) +model = dict( + auxiliary_head=dict( + align_corners=False, + channels=64, + concat_input=False, + dropout_ratio=0.1, + in_channels=128, + in_index=3, + loss_decode=dict( + loss_weight=0.4, type='CrossEntropyLoss', use_sigmoid=False), + norm_cfg=dict(requires_grad=True, type='SyncBN'), + num_classes=19, + num_convs=1, + type='FCNHead'), + backbone=dict( + act_cfg=dict(type='ReLU'), + base_channels=64, + conv_cfg=None, + dec_dilations=( + 1, + 1, + 1, + 1, + ), + dec_num_convs=( + 2, + 2, + 2, + 2, + ), + downsamples=( + True, + True, + True, + True, + ), + enc_dilations=( + 1, + 1, + 1, + 1, + 1, + ), + enc_num_convs=( + 2, + 2, + 2, + 2, + 2, + ), + in_channels=3, + norm_cfg=dict(requires_grad=True, type='SyncBN'), + norm_eval=False, + num_stages=5, + strides=( + 1, + 1, + 1, + 1, + 1, + ), + type='UNet', + upsample_cfg=dict(type='InterpConv'), + with_cp=False), + data_preprocessor=dict( + bgr_to_rgb=True, + mean=[ + 123.675, + 116.28, + 103.53, + ], + pad_val=0, + seg_pad_val=255, + size=( + 512, + 1024, + ), + std=[ + 58.395, + 57.12, + 57.375, + ], + type='SegDataPreProcessor'), + decode_head=dict( + align_corners=False, + channels=64, + concat_input=False, + dropout_ratio=0.1, + in_channels=64, + in_index=4, + loss_decode=dict( + loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False), + norm_cfg=dict(requires_grad=True, type='SyncBN'), + num_classes=19, + num_convs=1, + type='FCNHead'), + pretrained=None, + test_cfg=dict(crop_size=256, mode='whole', stride=170), + train_cfg=dict(), + type='EncoderDecoder') +norm_cfg = dict(requires_grad=True, type='SyncBN') +optim_wrapper = dict( + clip_grad=None, + optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005), + type='OptimWrapper') +optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005) +param_scheduler = [ + dict( + begin=0, + by_epoch=False, + end=160000, + eta_min=0.0001, + power=0.9, + type='PolyLR'), +] +resume = False +test_cfg = dict(type='TestLoop') +test_dataloader = dict( + batch_size=2, + dataset=dict( + data_prefix=dict( + img_path='leftImg8bit/val', seg_map_path='gtFine/val'), + data_root='data/cityscapes', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 1024, + 512, + ), type='Resize'), + dict(type='LoadAnnotations'), + dict(type='PackSegInputs'), + ], + type='CityscapesDataset'), + num_workers=4, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +test_evaluator = dict( + iou_metrics=[ + 'mIoU', + ], + type='IoUMetric') +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 1024, + 512, + ), type='Resize'), + dict(type='LoadAnnotations'), + dict(type='PackSegInputs'), +] +train_cfg = dict( + max_iters=160000, type='IterBasedTrainLoop', val_interval=16000) +train_dataloader = dict( + batch_size=4, + dataset=dict( + data_prefix=dict( + img_path='leftImg8bit/train', seg_map_path='gtFine/train'), + data_root='data/cityscapes/', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + keep_ratio=True, + ratio_range=( + 0.5, + 2.0, + ), + scale=( + 2048, + 1024, + ), + type='RandomResize'), + dict( + cat_max_ratio=0.75, crop_size=( + 512, + 1024, + ), type='RandomCrop'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs'), + ], + type='CityscapesDataset'), + num_workers=4, + persistent_workers=True, + sampler=dict(shuffle=True, type='InfiniteSampler')) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + keep_ratio=True, + ratio_range=( + 0.5, + 2.0, + ), + scale=( + 2048, + 1024, + ), + type='RandomResize'), + dict(cat_max_ratio=0.75, crop_size=( + 512, + 1024, + ), type='RandomCrop'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs'), +] +tta_model = dict(type='SegTTAModel') +tta_pipeline = [ + dict(backend_args=None, type='LoadImageFromFile'), + dict( + transforms=[ + [ + dict(keep_ratio=True, scale_factor=0.5, type='Resize'), + dict(keep_ratio=True, scale_factor=0.75, type='Resize'), + dict(keep_ratio=True, scale_factor=1.0, type='Resize'), + dict(keep_ratio=True, scale_factor=1.25, type='Resize'), + dict(keep_ratio=True, scale_factor=1.5, type='Resize'), + dict(keep_ratio=True, scale_factor=1.75, type='Resize'), + ], + [ + dict(direction='horizontal', prob=0.0, type='RandomFlip'), + dict(direction='horizontal', prob=1.0, type='RandomFlip'), + ], + [ + dict(type='LoadAnnotations'), + ], + [ + dict(type='PackSegInputs'), + ], + ], + type='TestTimeAug'), +] +val_cfg = dict(type='ValLoop') +val_dataloader = dict( + batch_size=1, + dataset=dict( + data_prefix=dict( + img_path='leftImg8bit/val', seg_map_path='gtFine/val'), + data_root='data/cityscapes/', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 2048, + 1024, + ), type='Resize'), + dict(type='LoadAnnotations'), + dict(type='PackSegInputs'), + ], + type='CityscapesDataset'), + num_workers=4, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +val_evaluator = dict( + iou_metrics=[ + 'mIoU', + ], type='IoUMetric') +vis_backends = [ + dict(type='LocalVisBackend'), +] +visualizer = dict( + name='visualizer', + type='SegLocalVisualizer', + vis_backends=[ + dict(type='LocalVisBackend'), + ]) +work_dir = './workspace' diff --git a/models/cv/semantic_segmentation/unet/igie/inference.py b/models/cv/semantic_segmentation/unet/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..489c012e272ae6759a5598eae5f8d6cf37b85228 --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/inference.py @@ -0,0 +1,140 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import tvm +import torch +import numpy as np +from tvm import relay +from tqdm import tqdm +from mmseg.registry import RUNNERS +from mmengine.config import Config + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # runner config + cfg = Config.fromfile("fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py") + + cfg.work_dir = "./workspace" + cfg['test_dataloader']['batch_size'] = batch_size + cfg['test_dataloader']['dataset']['data_root'] = args.datasets + cfg['log_level'] = 'ERROR' + + # build runner + runner = RUNNERS.build(cfg) + + for data in tqdm(runner.test_dataloader): + + input_data = runner.model.data_preprocessor(data, False) + image = input_data['inputs'].cpu() + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input("input", tvm.nd.array(image, device)) + + module.run() + + output = module.get_output(0).asnumpy() + + if pad_batch: + output = output[:origin_size] + + output = torch.from_numpy(output) + + outputs = runner.model.postprocess_result(output, data_samples=data['data_samples']) + + runner.test_evaluator.process(data_samples=outputs, data_batch=data) + + metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset)) + + print(metrics) + +if __name__ == "__main__": + main() diff --git a/models/cv/semantic_segmentation/unet/igie/requirements.txt b/models/cv/semantic_segmentation/unet/igie/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3f7e885b94016ed3ebe34e3abd4ed41d7e88f610 --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/requirements.txt @@ -0,0 +1,7 @@ +onnx +tqdm +onnxsim +mmsegmentation==1.2.2 +mmengine==0.10.7 +mmdeploy +ftfy \ No newline at end of file diff --git a/models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_accuracy.sh b/models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..61cc2b27c13284856089c9d606eac11398e2dc50 --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=16 +model_path="unet_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,512,1024 \ + --precision fp16 \ + --engine_path unet_opt_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine unet_opt_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} diff --git a/models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_performance.sh b/models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..6c7a3b859894c60c09418e68a8da058d83966186 --- /dev/null +++ b/models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=16 +model_path="unet_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,512,1024 \ + --precision fp16 \ + --engine_path unet_opt_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine unet_opt_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True diff --git a/tests/run_igie.py b/tests/run_igie.py index 3350605cdbdee170c664f8a269cc682923e959b0..cc8683b993030c1679d1c11af7386216ba92f188 100644 --- a/tests/run_igie.py +++ b/tests/run_igie.py @@ -55,7 +55,7 @@ def main(): sys.exit(-1) result = {} - if model["category"] == "cv/classification": + if model["category"] in ["cv/classification", "cv/semantic_segmentation"]: logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: @@ -142,6 +142,7 @@ def run_clf_testcase(model): } d_url = model["download_url"] checkpoint_n = d_url.split("/")[-1] + dataset_n = model["datasets"].split("/")[-1] prepare_script = f""" cd ../{model['model_path']} ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./ @@ -159,7 +160,7 @@ def run_clf_testcase(model): for prec in model["precisions"]: logging.info(f"Start running {model_name} {prec} test case") script = f""" - export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val + export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n} cd ../{model['model_path']} bash scripts/infer_{model_name}_{prec}_accuracy.sh bash scripts/infer_{model_name}_{prec}_performance.sh @@ -176,8 +177,22 @@ def run_clf_testcase(model): except ValueError: print("The string cannot be converted to a float.") result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]} - if matchs and len(matchs) == 2: - result["result"][prec]["status"] = "PASS" + if matchs: + if len(matchs) == 2: + result["result"][prec]["status"] = "PASS" + else: + # Define regex pattern to match key-value pairs inside curly braces + kv_pattern = r"'(\w+)'\s*:\s*([\d.]+)" + # Find all matches + kv_matches = re.findall(kv_pattern, sout) + for key, value in kv_matches: + result["result"][prec]["status"] = "PASS" + try: + result["result"][prec][key] = float(value) + except ValueError: + print("The string cannot be converted to a float.") + result["result"][prec][key] = value + result["result"][prec]["Cost time (s)"] = t logging.debug(f"matchs:\n{matchs}") return result