From 00fe9fcdba28b41c99698ffb25808733c08e1e95 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Wed, 16 Apr 2025 17:02:48 +0800
Subject: [PATCH 1/4] Add: gfl inference script.

---
 models/cv/object_detection/gfl/igie/README.md |  66 +++++
 .../object_detection/gfl/igie/build_engine.py |  73 +++++
 .../object_detection/gfl/igie/ci/prepare.sh   |  34 +++
 .../gfl/igie/deploy_default.py                |  41 +++
 models/cv/object_detection/gfl/igie/export.py |  72 +++++
 .../gfl/igie/gfl_r50_fpn_1x_coco.py           | 270 ++++++++++++++++++
 .../cv/object_detection/gfl/igie/inference.py | 157 ++++++++++
 .../gfl/igie/requirements.txt                 |   6 +
 .../igie/scripts/infer_gfl_fp16_accuracy.sh   |  35 +++
 .../scripts/infer_gfl_fp16_performance.sh     |  36 +++
 10 files changed, 790 insertions(+)
 create mode 100644 models/cv/object_detection/gfl/igie/README.md
 create mode 100644 models/cv/object_detection/gfl/igie/build_engine.py
 create mode 100644 models/cv/object_detection/gfl/igie/ci/prepare.sh
 create mode 100644 models/cv/object_detection/gfl/igie/deploy_default.py
 create mode 100644 models/cv/object_detection/gfl/igie/export.py
 create mode 100644 models/cv/object_detection/gfl/igie/gfl_r50_fpn_1x_coco.py
 create mode 100644 models/cv/object_detection/gfl/igie/inference.py
 create mode 100644 models/cv/object_detection/gfl/igie/requirements.txt
 create mode 100644 models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_accuracy.sh
 create mode 100644 models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_performance.sh

diff --git a/models/cv/object_detection/gfl/igie/README.md b/models/cv/object_detection/gfl/igie/README.md
new file mode 100644
index 00000000..6fc8553f
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/README.md
@@ -0,0 +1,66 @@
+# GFL (IGIE)
+
+## Model Description
+
+GFL (Generalized Focal Loss) is an object detection model that utilizes an improved focal loss function to address the class imbalance problem, enhancing classification capability and improving the detection accuracy of multi-scale objects and the precision of bounding box predictions. It is suitable for object detection tasks in complex scenes.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+|--------|-----------|---------|
+| MR-V100 | 4.2.0     |  25.06  |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth>
+
+Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
+
+### Install Dependencies
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-glx
+
+pip3 install -r requirements.txt
+```
+
+### Model Conversion
+
+```bash
+# export onnx model
+python3 export.py --weight gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth --cfg gfl_r50_fpn_1x_coco.py --output gfl.onnx
+
+# use onnxsim optimize onnx model
+onnxsim gfl.onnx gfl_opt.onnx
+```
+
+## Model Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_gfl_fp16_accuracy.sh
+# Performance
+bash scripts/infer_gfl_fp16_performance.sh
+```
+
+## Model Results
+
+| Model | BatchSize | Precision | FPS    | IOU@0.5 | IOU@0.5:0.95 |
+|-------|-----------|-----------|--------|---------|--------------|
+| GFL   | 32        | FP16      | 139.78 | 0.552   | 0.378        |
+
+## References
+
+- [mmdetection](https://github.com/open-mmlab/mmdetection.git)
diff --git a/models/cv/object_detection/gfl/igie/build_engine.py b/models/cv/object_detection/gfl/igie/build_engine.py
new file mode 100644
index 00000000..54aa8847
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/object_detection/gfl/igie/ci/prepare.sh b/models/cv/object_detection/gfl/igie/ci/prepare.sh
new file mode 100644
index 00000000..a488e8e0
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+# export onnx model
+python3 export.py --weight gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth --cfg gfl_r50_fpn_1x_coco.py --output gfl.onnx
+
+# use onnxsim optimize onnx model
+onnxsim gfl.onnx gfl_opt.onnx
\ No newline at end of file
diff --git a/models/cv/object_detection/gfl/igie/deploy_default.py b/models/cv/object_detection/gfl/igie/deploy_default.py
new file mode 100644
index 00000000..e6c4d46a
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/deploy_default.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+onnx_config = dict(
+    type='onnx',
+    export_params=True,
+    keep_initializers_as_inputs=False,
+    opset_version=11,
+    save_file='end2end.onnx',
+    input_names=['input'],
+    output_names=['output'],
+    input_shape=None,
+    optimize=True)
+
+codebase_config = dict(
+    type='mmdet',
+    task='ObjectDetection',
+    model_type='end2end',
+    post_processing=dict(
+        score_threshold=0.05,
+        confidence_threshold=0.005,
+        iou_threshold=0.5,
+        max_output_boxes_per_class=200,
+        pre_top_k=5000,
+        keep_top_k=100,
+        background_label_id=-1,
+    ))
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/object_detection/gfl/igie/export.py b/models/cv/object_detection/gfl/igie/export.py
new file mode 100644
index 00000000..eed56e0c
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/export.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+
+    parser.add_argument("--cfg", 
+                    type=str, 
+                    required=True, 
+                    help="model config file.")
+       
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+
+    deploy_cfg = 'deploy_default.py'
+    model_cfg = args.cfg
+    model_checkpoint = args.weight
+
+    deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+    task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+    model = task_processor.build_pytorch_model(model_checkpoint)
+
+    input_names = ['input']
+    dynamic_axes = {'input': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 800, 800)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        opset_version=13
+    )
+
+    print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+    main()
diff --git a/models/cv/object_detection/gfl/igie/gfl_r50_fpn_1x_coco.py b/models/cv/object_detection/gfl/igie/gfl_r50_fpn_1x_coco.py
new file mode 100644
index 00000000..cac53a3a
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/gfl_r50_fpn_1x_coco.py
@@ -0,0 +1,270 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+backend_args = None
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+    checkpoint=dict(interval=1, type='CheckpointHook'),
+    logger=dict(interval=50, type='LoggerHook'),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    timer=dict(type='IterTimerHook'),
+    visualization=dict(type='DetVisualizationHook'))
+default_scope = 'mmdet'
+env_cfg = dict(
+    cudnn_benchmark=False,
+    dist_cfg=dict(backend='nccl'),
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+load_from = None
+log_level = 'ERROR'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+model = dict(
+    backbone=dict(
+        depth=50,
+        frozen_stages=1,
+        init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'),
+        norm_cfg=dict(requires_grad=True, type='BN'),
+        norm_eval=True,
+        num_stages=4,
+        out_indices=(
+            0,
+            1,
+            2,
+            3,
+        ),
+        style='pytorch',
+        type='ResNet'),
+    bbox_head=dict(
+        anchor_generator=dict(
+            octave_base_scale=8,
+            ratios=[
+                1.0,
+            ],
+            scales_per_octave=1,
+            strides=[
+                8,
+                16,
+                32,
+                64,
+                128,
+            ],
+            type='AnchorGenerator'),
+        feat_channels=256,
+        in_channels=256,
+        loss_bbox=dict(loss_weight=2.0, type='GIoULoss'),
+        loss_cls=dict(
+            beta=2.0,
+            loss_weight=1.0,
+            type='QualityFocalLoss',
+            use_sigmoid=True),
+        loss_dfl=dict(loss_weight=0.25, type='DistributionFocalLoss'),
+        num_classes=80,
+        reg_max=16,
+        stacked_convs=4,
+        type='GFLHead'),
+    data_preprocessor=dict(
+        bgr_to_rgb=True,
+        mean=[
+            123.675,
+            116.28,
+            103.53,
+        ],
+        pad_size_divisor=32,
+        std=[
+            58.395,
+            57.12,
+            57.375,
+        ],
+        type='DetDataPreprocessor'),
+    neck=dict(
+        add_extra_convs='on_output',
+        in_channels=[
+            256,
+            512,
+            1024,
+            2048,
+        ],
+        num_outs=5,
+        out_channels=256,
+        start_level=1,
+        type='FPN'),
+    test_cfg=dict(
+        max_per_img=100,
+        min_bbox_size=0,
+        nms=dict(iou_threshold=0.6, type='nms'),
+        nms_pre=1000,
+        score_thr=0.05),
+    train_cfg=dict(
+        allowed_border=-1,
+        assigner=dict(topk=9, type='ATSSAssigner'),
+        debug=False,
+        pos_weight=-1),
+    type='GFL')
+optim_wrapper = dict(
+    optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001),
+    type='OptimWrapper')
+param_scheduler = [
+    dict(
+        begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
+    dict(
+        begin=0,
+        by_epoch=True,
+        end=12,
+        gamma=0.1,
+        milestones=[
+            8,
+            11,
+        ],
+        type='MultiStepLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+    batch_size=32,
+    dataset=dict(
+        ann_file='annotations/instances_val2017.json',
+        backend_args=None,
+        data_prefix=dict(img='images/val2017/'),
+        data_root='data/coco/',
+        pipeline=[
+            dict(backend_args=None, type='LoadImageFromFile'),
+            dict(keep_ratio=True, scale=(
+                800,
+                800,
+            ), type='Resize'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(
+                meta_keys=(
+                    'img_id',
+                    'img_path',
+                    'ori_shape',
+                    'img_shape',
+                    'scale_factor',
+                ),
+                type='PackDetInputs'),
+        ],
+        test_mode=True,
+        type='CocoDataset'),
+    drop_last=False,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+    ann_file=
+    'data/coco/annotations/instances_val2017.json',
+    backend_args=None,
+    format_only=False,
+    metric='bbox',
+    type='CocoMetric')
+test_pipeline = [
+    dict(backend_args=None, type='LoadImageFromFile'),
+    dict(keep_ratio=True, scale=(
+        800,
+        800,
+    ), type='Resize'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        meta_keys=(
+            'img_id',
+            'img_path',
+            'ori_shape',
+            'img_shape',
+            'scale_factor',
+        ),
+        type='PackDetInputs'),
+]
+train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    batch_size=2,
+    dataset=dict(
+        ann_file='annotations/instances_train2017.json',
+        backend_args=None,
+        data_prefix=dict(img='train2017/'),
+        data_root='data/coco/',
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=[
+            dict(backend_args=None, type='LoadImageFromFile'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(keep_ratio=True, scale=(
+                800,
+                800,
+            ), type='Resize'),
+            dict(prob=0.5, type='RandomFlip'),
+            dict(type='PackDetInputs'),
+        ],
+        type='CocoDataset'),
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+    dict(backend_args=None, type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(keep_ratio=True, scale=(
+        800,
+        800,
+    ), type='Resize'),
+    dict(prob=0.5, type='RandomFlip'),
+    dict(type='PackDetInputs'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+    batch_size=1,
+    dataset=dict(
+        ann_file='annotations/instances_val2017.json',
+        backend_args=None,
+        data_prefix=dict(img='val2017/'),
+        data_root='data/coco/',
+        pipeline=[
+            dict(backend_args=None, type='LoadImageFromFile'),
+            dict(keep_ratio=True, scale=(
+                800,
+                800,
+            ), type='Resize'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(
+                meta_keys=(
+                    'img_id',
+                    'img_path',
+                    'ori_shape',
+                    'img_shape',
+                    'scale_factor',
+                ),
+                type='PackDetInputs'),
+        ],
+        test_mode=True,
+        type='CocoDataset'),
+    drop_last=False,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+    ann_file='data/coco/annotations/instances_val2017.json',
+    backend_args=None,
+    format_only=False,
+    metric='bbox',
+    type='CocoMetric')
+vis_backends = [
+    dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+    name='visualizer',
+    type='DetLocalVisualizer',
+    vis_backends=[
+        dict(type='LocalVisBackend'),
+    ])
+work_dir = './workspace'
diff --git a/models/cv/object_detection/gfl/igie/inference.py b/models/cv/object_detection/gfl/igie/inference.py
new file mode 100644
index 00000000..cb0c30d4
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/inference.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import argparse
+import tvm
+import torch
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from mmdet.registry import RUNNERS
+from mmengine.config import Config
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine    
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:       
+         # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # runner config
+        cfg = Config.fromfile("gfl_r50_fpn_1x_coco.py")
+
+        cfg.work_dir = "./workspace"
+        cfg['test_dataloader']['batch_size'] = batch_size
+        cfg['test_dataloader']['dataset']['data_root'] = args.datasets
+        cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
+        cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+        cfg['log_level'] = 'ERROR'
+
+        # build runner
+        runner = RUNNERS.build(cfg)
+    
+        for data in tqdm(runner.test_dataloader):
+            cls_score = []
+            box_reg = []
+            
+            input_data = runner.model.data_preprocessor(data, False)
+            image = input_data['inputs'].cpu()
+            pad_batch = len(image) != batch_size
+
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input("input", tvm.nd.array(image, device))
+
+            module.run()
+            
+            for i in range(module.get_num_outputs()):
+                output = module.get_output(i).asnumpy()
+
+                if pad_batch:
+                    output = output[:origin_size]
+
+                output = torch.from_numpy(output)
+
+                if output.shape[1] == 80:
+                    cls_score.append(output)
+                else:
+                    box_reg.append(output)
+
+            batch_img_metas = [
+                data_samples.metainfo for data_samples in data['data_samples']
+            ]  
+
+            preds = runner.model.bbox_head.predict_by_feat(
+                cls_score, box_reg, batch_img_metas=batch_img_metas, rescale=True
+            )
+
+            batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], preds)
+
+            runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=data)
+
+        metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))        
+    
+
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/object_detection/gfl/igie/requirements.txt b/models/cv/object_detection/gfl/igie/requirements.txt
new file mode 100644
index 00000000..b6b3fff4
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/requirements.txt
@@ -0,0 +1,6 @@
+onnx
+tqdm
+onnxsim
+mmdet==3.3.0
+mmdeploy==1.3.1
+mmengine==0.10.4
diff --git a/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_accuracy.sh b/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_accuracy.sh
new file mode 100644
index 00000000..63f8d669
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="gfl_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,800,800    \
+    --precision fp16                        \
+    --engine_path gfl_opt_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine gfl_opt_bs_${batchsize}_fp16.so  \
+    --batchsize ${batchsize}                  \
+    --input_name input                        \
+    --datasets ${datasets_path}
diff --git a/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_performance.sh b/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_performance.sh
new file mode 100644
index 00000000..2cf32f97
--- /dev/null
+++ b/models/cv/object_detection/gfl/igie/scripts/infer_gfl_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="gfl_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,800,800    \
+    --precision fp16                        \
+    --engine_path gfl_opt_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine gfl_opt_bs_${batchsize}_fp16.so  \
+    --batchsize ${batchsize}                  \
+    --input_name input                        \
+    --datasets ${datasets_path}               \
+    --perf_only True
-- 
Gitee


From 749f319e067887c943c36af232c1b69a1a10060f Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Thu, 17 Apr 2025 18:47:27 +0800
Subject: [PATCH 2/4] Add: UNet inference script.

---
 .../cv/semantic_segmentation/igie/README.md   |  67 ++++
 .../igie/build_engine.py                      |  73 ++++
 .../semantic_segmentation/igie/ci/prepare.sh  |  34 ++
 .../igie/deploy_default.py                    |  29 ++
 .../cv/semantic_segmentation/igie/export.py   |  73 ++++
 ...net_s5-d16_4x4_512x1024_160k_cityscapes.py | 331 ++++++++++++++++++
 .../semantic_segmentation/igie/inference.py   | 140 ++++++++
 .../igie/requirements.txt                     |   5 +
 .../igie/scripts/infer_unet_fp16_accuracy.sh  |  35 ++
 .../scripts/infer_unet_fp16_performance.sh    |  36 ++
 10 files changed, 823 insertions(+)
 create mode 100644 models/cv/semantic_segmentation/igie/README.md
 create mode 100644 models/cv/semantic_segmentation/igie/build_engine.py
 create mode 100644 models/cv/semantic_segmentation/igie/ci/prepare.sh
 create mode 100644 models/cv/semantic_segmentation/igie/deploy_default.py
 create mode 100644 models/cv/semantic_segmentation/igie/export.py
 create mode 100644 models/cv/semantic_segmentation/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py
 create mode 100644 models/cv/semantic_segmentation/igie/inference.py
 create mode 100644 models/cv/semantic_segmentation/igie/requirements.txt
 create mode 100644 models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_accuracy.sh
 create mode 100644 models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_performance.sh

diff --git a/models/cv/semantic_segmentation/igie/README.md b/models/cv/semantic_segmentation/igie/README.md
new file mode 100644
index 00000000..a5745b0e
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/README.md
@@ -0,0 +1,67 @@
+# UNet (IGIE)
+
+## Model Description
+
+UNet is a convolutional neural network architecture for image segmentation, featuring a symmetric encoder-decoder structure. The encoder gradually extracts features and reduces spatial dimensions, while the decoder restores resolution through upsampling. Key skip connections allow high-resolution features to be directly passed to the decoder, enhancing detail retention and segmentation accuracy.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+|--------|-----------|---------|
+| MR-V100 | 4.2.0     |  25.06  |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth>
+
+Dataset: <https://www.cityscapes-dataset.com> to download the dataset.
+
+### Install Dependencies
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-glx
+
+pip3 install -r requirements.txt
+```
+
+### Model Conversion
+
+```bash
+# export onnx model
+
+python3 export.py --weight fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth --cfg fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py --output unet.onnx
+
+# use onnxsim optimize onnx model
+onnxsim unet.onnx unet_opt.onnx
+```
+
+## Model Inference
+
+```bash
+export DATASETS_DIR=/Path/to/cityscapes/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_unet_fp16_accuracy.sh
+# Performance
+bash scripts/infer_unet_fp16_performance.sh
+```
+
+## Model Results
+
+| Model | BatchSize | Precision | FPS    |  mIoU   | 
+|-------|-----------|-----------|--------|---------|
+| UNet  | 16        | FP16      | 66.265 |  69.48  |
+
+## References
+
+- [mmsegmentation](https://github.com/open-mmlab/mmsegmentation)
diff --git a/models/cv/semantic_segmentation/igie/build_engine.py b/models/cv/semantic_segmentation/igie/build_engine.py
new file mode 100644
index 00000000..54aa8847
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/semantic_segmentation/igie/ci/prepare.sh b/models/cv/semantic_segmentation/igie/ci/prepare.sh
new file mode 100644
index 00000000..57461103
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+# export onnx model
+python3 export.py --weight fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth --cfg fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py --output unet.onnx
+
+# use onnxsim optimize onnx model
+onnxsim unet.onnx unet_opt.onnx
diff --git a/models/cv/semantic_segmentation/igie/deploy_default.py b/models/cv/semantic_segmentation/igie/deploy_default.py
new file mode 100644
index 00000000..33515ff1
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/deploy_default.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+onnx_config = dict(
+    type='onnx',
+    export_params=True,
+    keep_initializers_as_inputs=False,
+    opset_version=11,
+    save_file='end2end.onnx',
+    input_names=['input'],
+    output_names=['output'],
+    input_shape=None,
+    optimize=True)
+
+codebase_config = dict(type='mmseg', task='Segmentation', with_argmax=True)
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/semantic_segmentation/igie/export.py b/models/cv/semantic_segmentation/igie/export.py
new file mode 100644
index 00000000..d460a759
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/export.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+
+    parser.add_argument("--cfg", 
+                    type=str, 
+                    required=True, 
+                    help="model config file.")
+       
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+
+    deploy_cfg = 'deploy_default.py'
+    model_cfg = args.cfg
+    model_checkpoint = args.weight
+
+    deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+    task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+    model = task_processor.build_pytorch_model(model_checkpoint)
+
+    input_names = ['input']
+    dynamic_axes = {'input': {0: '-1'}}
+
+    dummy_input = torch.randn(1, 3, 512, 1024)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        opset_version=13
+    )
+
+    print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+    main()
diff --git a/models/cv/semantic_segmentation/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py b/models/cv/semantic_segmentation/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py
new file mode 100644
index 00000000..b0f3cb1f
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py
@@ -0,0 +1,331 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+crop_size = (
+    512,
+    1024,
+)
+data_preprocessor = dict(
+    bgr_to_rgb=True,
+    mean=[
+        123.675,
+        116.28,
+        103.53,
+    ],
+    pad_val=0,
+    seg_pad_val=255,
+    size=(
+        512,
+        1024,
+    ),
+    std=[
+        58.395,
+        57.12,
+        57.375,
+    ],
+    type='SegDataPreProcessor')
+data_root = 'data/cityscapes/'
+dataset_type = 'CityscapesDataset'
+default_hooks = dict(
+    checkpoint=dict(by_epoch=False, interval=16000, type='CheckpointHook'),
+    logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    timer=dict(type='IterTimerHook'),
+    visualization=dict(type='SegVisualizationHook'))
+default_scope = 'mmseg'
+env_cfg = dict(
+    cudnn_benchmark=True,
+    dist_cfg=dict(backend='nccl'),
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+img_ratios = [
+    0.5,
+    0.75,
+    1.0,
+    1.25,
+    1.5,
+    1.75,
+]
+launcher = 'none'
+load_from = '../fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth'
+log_level = 'ERROR'
+log_processor = dict(by_epoch=False)
+model = dict(
+    auxiliary_head=dict(
+        align_corners=False,
+        channels=64,
+        concat_input=False,
+        dropout_ratio=0.1,
+        in_channels=128,
+        in_index=3,
+        loss_decode=dict(
+            loss_weight=0.4, type='CrossEntropyLoss', use_sigmoid=False),
+        norm_cfg=dict(requires_grad=True, type='SyncBN'),
+        num_classes=19,
+        num_convs=1,
+        type='FCNHead'),
+    backbone=dict(
+        act_cfg=dict(type='ReLU'),
+        base_channels=64,
+        conv_cfg=None,
+        dec_dilations=(
+            1,
+            1,
+            1,
+            1,
+        ),
+        dec_num_convs=(
+            2,
+            2,
+            2,
+            2,
+        ),
+        downsamples=(
+            True,
+            True,
+            True,
+            True,
+        ),
+        enc_dilations=(
+            1,
+            1,
+            1,
+            1,
+            1,
+        ),
+        enc_num_convs=(
+            2,
+            2,
+            2,
+            2,
+            2,
+        ),
+        in_channels=3,
+        norm_cfg=dict(requires_grad=True, type='SyncBN'),
+        norm_eval=False,
+        num_stages=5,
+        strides=(
+            1,
+            1,
+            1,
+            1,
+            1,
+        ),
+        type='UNet',
+        upsample_cfg=dict(type='InterpConv'),
+        with_cp=False),
+    data_preprocessor=dict(
+        bgr_to_rgb=True,
+        mean=[
+            123.675,
+            116.28,
+            103.53,
+        ],
+        pad_val=0,
+        seg_pad_val=255,
+        size=(
+            512,
+            1024,
+        ),
+        std=[
+            58.395,
+            57.12,
+            57.375,
+        ],
+        type='SegDataPreProcessor'),
+    decode_head=dict(
+        align_corners=False,
+        channels=64,
+        concat_input=False,
+        dropout_ratio=0.1,
+        in_channels=64,
+        in_index=4,
+        loss_decode=dict(
+            loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False),
+        norm_cfg=dict(requires_grad=True, type='SyncBN'),
+        num_classes=19,
+        num_convs=1,
+        type='FCNHead'),
+    pretrained=None,
+    test_cfg=dict(crop_size=256, mode='whole', stride=170),
+    train_cfg=dict(),
+    type='EncoderDecoder')
+norm_cfg = dict(requires_grad=True, type='SyncBN')
+optim_wrapper = dict(
+    clip_grad=None,
+    optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005),
+    type='OptimWrapper')
+optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
+param_scheduler = [
+    dict(
+        begin=0,
+        by_epoch=False,
+        end=160000,
+        eta_min=0.0001,
+        power=0.9,
+        type='PolyLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+    batch_size=2,
+    dataset=dict(
+        data_prefix=dict(
+            img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
+        data_root='data/cityscapes',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(keep_ratio=True, scale=(
+                1024,
+                512,
+            ), type='Resize'),
+            dict(type='LoadAnnotations'),
+            dict(type='PackSegInputs'),
+        ],
+        type='CityscapesDataset'),
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+    iou_metrics=[
+        'mIoU',
+    ],
+    type='IoUMetric')
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(keep_ratio=True, scale=(
+        1024,
+        512,
+    ), type='Resize'),
+    dict(type='LoadAnnotations'),
+    dict(type='PackSegInputs'),
+]
+train_cfg = dict(
+    max_iters=160000, type='IterBasedTrainLoop', val_interval=16000)
+train_dataloader = dict(
+    batch_size=4,
+    dataset=dict(
+        data_prefix=dict(
+            img_path='leftImg8bit/train', seg_map_path='gtFine/train'),
+        data_root='data/cityscapes/',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(type='LoadAnnotations'),
+            dict(
+                keep_ratio=True,
+                ratio_range=(
+                    0.5,
+                    2.0,
+                ),
+                scale=(
+                    2048,
+                    1024,
+                ),
+                type='RandomResize'),
+            dict(
+                cat_max_ratio=0.75, crop_size=(
+                    512,
+                    1024,
+                ), type='RandomCrop'),
+            dict(prob=0.5, type='RandomFlip'),
+            dict(type='PhotoMetricDistortion'),
+            dict(type='PackSegInputs'),
+        ],
+        type='CityscapesDataset'),
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(shuffle=True, type='InfiniteSampler'))
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations'),
+    dict(
+        keep_ratio=True,
+        ratio_range=(
+            0.5,
+            2.0,
+        ),
+        scale=(
+            2048,
+            1024,
+        ),
+        type='RandomResize'),
+    dict(cat_max_ratio=0.75, crop_size=(
+        512,
+        1024,
+    ), type='RandomCrop'),
+    dict(prob=0.5, type='RandomFlip'),
+    dict(type='PhotoMetricDistortion'),
+    dict(type='PackSegInputs'),
+]
+tta_model = dict(type='SegTTAModel')
+tta_pipeline = [
+    dict(backend_args=None, type='LoadImageFromFile'),
+    dict(
+        transforms=[
+            [
+                dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
+                dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
+                dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
+                dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
+                dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
+                dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
+            ],
+            [
+                dict(direction='horizontal', prob=0.0, type='RandomFlip'),
+                dict(direction='horizontal', prob=1.0, type='RandomFlip'),
+            ],
+            [
+                dict(type='LoadAnnotations'),
+            ],
+            [
+                dict(type='PackSegInputs'),
+            ],
+        ],
+        type='TestTimeAug'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+    batch_size=1,
+    dataset=dict(
+        data_prefix=dict(
+            img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
+        data_root='data/cityscapes/',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(keep_ratio=True, scale=(
+                2048,
+                1024,
+            ), type='Resize'),
+            dict(type='LoadAnnotations'),
+            dict(type='PackSegInputs'),
+        ],
+        type='CityscapesDataset'),
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+    iou_metrics=[
+        'mIoU',
+    ], type='IoUMetric')
+vis_backends = [
+    dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+    name='visualizer',
+    type='SegLocalVisualizer',
+    vis_backends=[
+        dict(type='LocalVisBackend'),
+    ])
+work_dir = './workspace'
diff --git a/models/cv/semantic_segmentation/igie/inference.py b/models/cv/semantic_segmentation/igie/inference.py
new file mode 100644
index 00000000..489c012e
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/inference.py
@@ -0,0 +1,140 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import argparse
+import tvm
+import torch
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from mmseg.registry import RUNNERS
+from mmengine.config import Config
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine    
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:       
+         # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # runner config
+        cfg = Config.fromfile("fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py")
+
+        cfg.work_dir = "./workspace"
+        cfg['test_dataloader']['batch_size'] = batch_size
+        cfg['test_dataloader']['dataset']['data_root'] = args.datasets
+        cfg['log_level'] = 'ERROR'
+
+        # build runner
+        runner = RUNNERS.build(cfg)
+    
+        for data in tqdm(runner.test_dataloader):
+
+            input_data = runner.model.data_preprocessor(data, False)
+            image = input_data['inputs'].cpu()
+            pad_batch = len(image) != batch_size
+
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input("input", tvm.nd.array(image, device))
+
+            module.run()
+            
+            output = module.get_output(0).asnumpy()
+            
+            if pad_batch:
+                output = output[:origin_size]
+            
+            output = torch.from_numpy(output)
+
+            outputs = runner.model.postprocess_result(output, data_samples=data['data_samples'])
+
+            runner.test_evaluator.process(data_samples=outputs, data_batch=data)
+
+        metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))        
+
+        print(metrics)
+
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/semantic_segmentation/igie/requirements.txt b/models/cv/semantic_segmentation/igie/requirements.txt
new file mode 100644
index 00000000..cf8f15ec
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/requirements.txt
@@ -0,0 +1,5 @@
+onnx
+tqdm
+onnxsim
+mmsegmentation==1.2.2
+mmengine==0.10.7
diff --git a/models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_accuracy.sh b/models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_accuracy.sh
new file mode 100644
index 00000000..61cc2b27
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=16
+model_path="unet_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,512,1024   \
+    --precision fp16                        \
+    --engine_path unet_opt_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine unet_opt_bs_${batchsize}_fp16.so \
+    --batchsize ${batchsize}                  \
+    --input_name input                        \
+    --datasets ${datasets_path}
diff --git a/models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_performance.sh b/models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_performance.sh
new file mode 100644
index 00000000..6c7a3b85
--- /dev/null
+++ b/models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=16
+model_path="unet_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,512,1024   \
+    --precision fp16                        \
+    --engine_path unet_opt_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine unet_opt_bs_${batchsize}_fp16.so \
+    --batchsize ${batchsize}                  \
+    --input_name input                        \
+    --datasets ${datasets_path}               \
+    --perf_only True
-- 
Gitee


From bec3c7257ff79517c5d27027b23a46b49f432af7 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Tue, 22 Apr 2025 10:02:13 +0800
Subject: [PATCH 3/4] Update: modify UNet model directory.

---
 models/cv/semantic_segmentation/{ => unet}/igie/README.md         | 0
 models/cv/semantic_segmentation/{ => unet}/igie/build_engine.py   | 0
 models/cv/semantic_segmentation/{ => unet}/igie/ci/prepare.sh     | 0
 models/cv/semantic_segmentation/{ => unet}/igie/deploy_default.py | 0
 models/cv/semantic_segmentation/{ => unet}/igie/export.py         | 0
 .../igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py          | 0
 models/cv/semantic_segmentation/{ => unet}/igie/inference.py      | 0
 models/cv/semantic_segmentation/{ => unet}/igie/requirements.txt  | 0
 .../{ => unet}/igie/scripts/infer_unet_fp16_accuracy.sh           | 0
 .../{ => unet}/igie/scripts/infer_unet_fp16_performance.sh        | 0
 10 files changed, 0 insertions(+), 0 deletions(-)
 rename models/cv/semantic_segmentation/{ => unet}/igie/README.md (100%)
 rename models/cv/semantic_segmentation/{ => unet}/igie/build_engine.py (100%)
 rename models/cv/semantic_segmentation/{ => unet}/igie/ci/prepare.sh (100%)
 rename models/cv/semantic_segmentation/{ => unet}/igie/deploy_default.py (100%)
 rename models/cv/semantic_segmentation/{ => unet}/igie/export.py (100%)
 rename models/cv/semantic_segmentation/{ => unet}/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py (100%)
 rename models/cv/semantic_segmentation/{ => unet}/igie/inference.py (100%)
 rename models/cv/semantic_segmentation/{ => unet}/igie/requirements.txt (100%)
 rename models/cv/semantic_segmentation/{ => unet}/igie/scripts/infer_unet_fp16_accuracy.sh (100%)
 rename models/cv/semantic_segmentation/{ => unet}/igie/scripts/infer_unet_fp16_performance.sh (100%)

diff --git a/models/cv/semantic_segmentation/igie/README.md b/models/cv/semantic_segmentation/unet/igie/README.md
similarity index 100%
rename from models/cv/semantic_segmentation/igie/README.md
rename to models/cv/semantic_segmentation/unet/igie/README.md
diff --git a/models/cv/semantic_segmentation/igie/build_engine.py b/models/cv/semantic_segmentation/unet/igie/build_engine.py
similarity index 100%
rename from models/cv/semantic_segmentation/igie/build_engine.py
rename to models/cv/semantic_segmentation/unet/igie/build_engine.py
diff --git a/models/cv/semantic_segmentation/igie/ci/prepare.sh b/models/cv/semantic_segmentation/unet/igie/ci/prepare.sh
similarity index 100%
rename from models/cv/semantic_segmentation/igie/ci/prepare.sh
rename to models/cv/semantic_segmentation/unet/igie/ci/prepare.sh
diff --git a/models/cv/semantic_segmentation/igie/deploy_default.py b/models/cv/semantic_segmentation/unet/igie/deploy_default.py
similarity index 100%
rename from models/cv/semantic_segmentation/igie/deploy_default.py
rename to models/cv/semantic_segmentation/unet/igie/deploy_default.py
diff --git a/models/cv/semantic_segmentation/igie/export.py b/models/cv/semantic_segmentation/unet/igie/export.py
similarity index 100%
rename from models/cv/semantic_segmentation/igie/export.py
rename to models/cv/semantic_segmentation/unet/igie/export.py
diff --git a/models/cv/semantic_segmentation/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py b/models/cv/semantic_segmentation/unet/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py
similarity index 100%
rename from models/cv/semantic_segmentation/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py
rename to models/cv/semantic_segmentation/unet/igie/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py
diff --git a/models/cv/semantic_segmentation/igie/inference.py b/models/cv/semantic_segmentation/unet/igie/inference.py
similarity index 100%
rename from models/cv/semantic_segmentation/igie/inference.py
rename to models/cv/semantic_segmentation/unet/igie/inference.py
diff --git a/models/cv/semantic_segmentation/igie/requirements.txt b/models/cv/semantic_segmentation/unet/igie/requirements.txt
similarity index 100%
rename from models/cv/semantic_segmentation/igie/requirements.txt
rename to models/cv/semantic_segmentation/unet/igie/requirements.txt
diff --git a/models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_accuracy.sh b/models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_accuracy.sh
similarity index 100%
rename from models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_accuracy.sh
rename to models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_accuracy.sh
diff --git a/models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_performance.sh b/models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_performance.sh
similarity index 100%
rename from models/cv/semantic_segmentation/igie/scripts/infer_unet_fp16_performance.sh
rename to models/cv/semantic_segmentation/unet/igie/scripts/infer_unet_fp16_performance.sh
-- 
Gitee


From 61fc61d0b2092a08dfaf3030e94cc275f3971e42 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 22 Apr 2025 10:55:45 +0800
Subject: [PATCH 4/4] fix unet requirements

---
 .../unet/igie/requirements.txt                |  2 ++
 tests/run_igie.py                             | 23 +++++++++++++++----
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/models/cv/semantic_segmentation/unet/igie/requirements.txt b/models/cv/semantic_segmentation/unet/igie/requirements.txt
index cf8f15ec..3f7e885b 100644
--- a/models/cv/semantic_segmentation/unet/igie/requirements.txt
+++ b/models/cv/semantic_segmentation/unet/igie/requirements.txt
@@ -3,3 +3,5 @@ tqdm
 onnxsim
 mmsegmentation==1.2.2
 mmengine==0.10.7
+mmdeploy
+ftfy
\ No newline at end of file
diff --git a/tests/run_igie.py b/tests/run_igie.py
index 3350605c..cc8683b9 100644
--- a/tests/run_igie.py
+++ b/tests/run_igie.py
@@ -55,7 +55,7 @@ def main():
         sys.exit(-1)
 
     result = {}
-    if model["category"] == "cv/classification":
+    if model["category"] in ["cv/classification", "cv/semantic_segmentation"]:
         logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
         d_url = model["download_url"]
         if d_url is not None:
@@ -142,6 +142,7 @@ def run_clf_testcase(model):
     }
     d_url = model["download_url"]
     checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
     prepare_script = f"""
     cd ../{model['model_path']}
     ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./
@@ -159,7 +160,7 @@ def run_clf_testcase(model):
     for prec in model["precisions"]:
         logging.info(f"Start running {model_name} {prec} test case")
         script = f"""
-        export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val
+        export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n}
         cd ../{model['model_path']}
         bash scripts/infer_{model_name}_{prec}_accuracy.sh
         bash scripts/infer_{model_name}_{prec}_performance.sh
@@ -176,8 +177,22 @@ def run_clf_testcase(model):
             except ValueError:
                 print("The string cannot be converted to a float.")
                 result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
-        if matchs and len(matchs) == 2:
-            result["result"][prec]["status"] = "PASS"
+        if matchs:
+            if len(matchs) == 2:
+                result["result"][prec]["status"] = "PASS"
+            else:
+                # Define regex pattern to match key-value pairs inside curly braces
+                kv_pattern = r"'(\w+)'\s*:\s*([\d.]+)"
+                # Find all matches
+                kv_matches = re.findall(kv_pattern, sout)
+                for key, value in kv_matches:
+                    result["result"][prec]["status"] = "PASS"
+                    try:
+                        result["result"][prec][key] = float(value)
+                    except ValueError:
+                        print("The string cannot be converted to a float.")
+                        result["result"][prec][key] = value
+
         result["result"][prec]["Cost time (s)"] = t
         logging.debug(f"matchs:\n{matchs}")
     return result
-- 
Gitee