From 3997961b3fdc7850a3706a71478c3891545769e7 Mon Sep 17 00:00:00 2001
From: "xinchi.tian" <xinchi.tian@iluvatar.com>
Date: Mon, 6 May 2024 15:32:31 +0800
Subject: [PATCH] Add Detr model in IXRT

link #I9FP0V
Add Detr model in IXRT

Signed-off-by: xinchi.tian <xinchi.tian@iluvatar.com>
---
 models/cv/detection/detr/ixrt/README.md       |  55 ++++
 models/cv/detection/detr/ixrt/build_engine.py |  59 ++++
 .../detr/ixrt/calibration_dataset.py          |  43 +++
 models/cv/detection/detr/ixrt/coco_labels.py  | 104 +++++++
 models/cv/detection/detr/ixrt/common.py       | 116 +++++++
 .../cv/detection/detr/ixrt/config/DETR_CONFIG |  44 +++
 .../detection/detr/ixrt/datasets/__init__.py  |  14 +
 .../cv/detection/detr/ixrt/datasets/coco.py   | 131 ++++++++
 .../cv/detection/detr/ixrt/datasets/common.py |  83 +++++
 .../detr/ixrt/datasets/post_process.py        | 173 +++++++++++
 .../detr/ixrt/datasets/pre_process.py         |  91 ++++++
 .../cv/detection/detr/ixrt/datasets/vision.py | 151 +++++++++
 models/cv/detection/detr/ixrt/deploy.py       | 123 ++++++++
 models/cv/detection/detr/ixrt/export_model.py | 121 ++++++++
 .../detr/ixrt/extract_graph_weight.py         | 139 +++++++++
 models/cv/detection/detr/ixrt/inference.py    | 239 ++++++++++++++
 .../detection/detr/ixrt/load_ixrt_plugin.py   |  27 ++
 .../detection/detr/ixrt/modify_batchsize.py   | 179 +++++++++++
 models/cv/detection/detr/ixrt/refine_model.py | 291 ++++++++++++++++++
 .../detr/ixrt/refine_utils/__init__.py        |  14 +
 .../detr/ixrt/refine_utils/common.py          |  37 +++
 .../detr/ixrt/refine_utils/linear_pass.py     | 114 +++++++
 .../ixrt/refine_utils/matmul_to_gemm_pass.py  |  55 ++++
 .../ixrt/scripts/infer_detr_fp16_accuracy.sh  | 142 +++++++++
 .../scripts/infer_detr_fp16_performance.sh    | 142 +++++++++
 .../cv/detection/detr/ixrt/simplify_model.py  |  36 +++
 26 files changed, 2723 insertions(+)
 create mode 100755 models/cv/detection/detr/ixrt/README.md
 create mode 100644 models/cv/detection/detr/ixrt/build_engine.py
 create mode 100644 models/cv/detection/detr/ixrt/calibration_dataset.py
 create mode 100644 models/cv/detection/detr/ixrt/coco_labels.py
 create mode 100644 models/cv/detection/detr/ixrt/common.py
 create mode 100644 models/cv/detection/detr/ixrt/config/DETR_CONFIG
 create mode 100644 models/cv/detection/detr/ixrt/datasets/__init__.py
 create mode 100644 models/cv/detection/detr/ixrt/datasets/coco.py
 create mode 100644 models/cv/detection/detr/ixrt/datasets/common.py
 create mode 100644 models/cv/detection/detr/ixrt/datasets/post_process.py
 create mode 100644 models/cv/detection/detr/ixrt/datasets/pre_process.py
 create mode 100755 models/cv/detection/detr/ixrt/datasets/vision.py
 create mode 100644 models/cv/detection/detr/ixrt/deploy.py
 create mode 100644 models/cv/detection/detr/ixrt/export_model.py
 create mode 100644 models/cv/detection/detr/ixrt/extract_graph_weight.py
 create mode 100755 models/cv/detection/detr/ixrt/inference.py
 create mode 100644 models/cv/detection/detr/ixrt/load_ixrt_plugin.py
 create mode 100644 models/cv/detection/detr/ixrt/modify_batchsize.py
 create mode 100644 models/cv/detection/detr/ixrt/refine_model.py
 create mode 100644 models/cv/detection/detr/ixrt/refine_utils/__init__.py
 create mode 100644 models/cv/detection/detr/ixrt/refine_utils/common.py
 create mode 100644 models/cv/detection/detr/ixrt/refine_utils/linear_pass.py
 create mode 100644 models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py
 create mode 100755 models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh
 create mode 100755 models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh
 create mode 100644 models/cv/detection/detr/ixrt/simplify_model.py

diff --git a/models/cv/detection/detr/ixrt/README.md b/models/cv/detection/detr/ixrt/README.md
new file mode 100755
index 00000000..e1b76b56
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/README.md
@@ -0,0 +1,55 @@
+# Detr
+
+## Description
+DETR (DEtection TRansformer) is a novel approach that views object detection as a direct set prediction problem. This method streamlines the detection process, eliminating the need for many hand-designed components like non-maximum suppression procedures or anchor generation, which are typically used to explicitly encode prior knowledge about the task.
+
+## Setup
+
+### Install
+```bash
+yum install mesa-libGL
+pip3 install tqdm
+pip3 install pycuda
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install cv2
+pip3 install pycocotools
+pip3 install opencv-python==4.6.0.66
+```
+
+### Download
+Pretrained model: <https://download.openmmlab.com/mmdetection/v3.0/detr/detr_r50_8xb2-150e_coco/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth>
+
+Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
+
+### Model Conversion
+```bash
+mkdir checkpoints
+python3 export_model.py --torch_file /path/to/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth --onnx_file checkpoints/detr_res50.onnx --bsz 1
+```
+
+## Inference
+```bash
+export PROJ_DIR=./
+export DATASETS_DIR=/path/to/coco2017/
+export CHECKPOINTS_DIR=./checkpoints
+export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
+export EVAL_DIR=${DATASETS_DIR}/val2017
+export RUN_DIR=./
+export CONFIG_DIR=config/DETR_CONFIG
+```
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_detr_fp16_accuracy.sh
+# Performance
+bash scripts/infer_detr_fp16_performance.sh
+```
+
+## Results 
+
+Model   |BatchSize  |Precision |FPS       |MAP@0.5   |MAP@0.5:0.95
+--------|-----------|----------|----------|----------|------------
+Detr    |    1      |   FP16   | 65.84    |  0.370   | 0.198
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/build_engine.py b/models/cv/detection/detr/ixrt/build_engine.py
new file mode 100644
index 00000000..3a00b513
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/build_engine.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+def main(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    # parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    if precision == tensorrt.BuilderFlag.INT8:
+        parser.parse_from_files(config.model, config.quant_file)
+    else:
+        parser.parse_from_file(config.model)
+
+    # print("precision : ", precision)
+    build_config.set_flag(precision)
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--quant_file", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default=None)
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/calibration_dataset.py b/models/cv/detection/detr/ixrt/calibration_dataset.py
new file mode 100644
index 00000000..de37775a
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/calibration_dataset.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from datasets.coco import CocoDetection
+
+def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"):
+    dataset = CocoDetection(
+        root=data_path,
+        annFile=annFile,
+        img_size=img_sz,
+        data_process_type=data_process_type
+    )
+    calibration_dataset = dataset
+    num_samples = min(5000, batch_size * step)
+    if num_samples > 0:
+        calibration_dataset = torch.utils.data.Subset(
+            dataset, indices=range(num_samples)
+        )
+
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+    return calibration_dataloader
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/coco_labels.py b/models/cv/detection/detr/ixrt/coco_labels.py
new file mode 100644
index 00000000..43f5bd82
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/coco_labels.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+labels = [
+    "person",
+    "bicycle",
+    "car",
+    "motorcycle",
+    "airplane",
+    "bus",
+    "train",
+    "truck",
+    "boat",
+    "traffic light",
+    "fire hydrant",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "backpack",
+    "umbrella",
+    "handbag",
+    "tie",
+    "suitcase",
+    "frisbee",
+    "skis",
+    "snowboard",
+    "sports ball",
+    "kite",
+    "baseball bat",
+    "baseball glove",
+    "skateboard",
+    "surfboard",
+    "tennis racket",
+    "bottle",
+    "wine glass",
+    "cup",
+    "fork",
+    "knife",
+    "spoon",
+    "bowl",
+    "banana",
+    "apple",
+    "sandwich",
+    "orange",
+    "broccoli",
+    "carrot",
+    "hot dog",
+    "pizza",
+    "donut",
+    "cake",
+    "chair",
+    "couch",
+    "potted plant",
+    "bed",
+    "dining table",
+    "toilet",
+    "tv",
+    "laptop",
+    "mouse",
+    "remote",
+    "keyboard",
+    "cell phone",
+    "microwave",
+    "oven",
+    "toaster",
+    "sink",
+    "refrigerator",
+    "book",
+    "clock",
+    "vase",
+    "scissors",
+    "teddy bear",
+    "hair drier",
+    "toothbrush",
+]
+def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
+    return [
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
+        35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+        64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
+
+__all__ = ["labels"]
diff --git a/models/cv/detection/detr/ixrt/common.py b/models/cv/detection/detr/ixrt/common.py
new file mode 100644
index 00000000..c8d4a7b9
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/common.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import numpy as np
+from tqdm import tqdm
+
+import tensorrt
+import pycuda.driver as cuda
+
+# input  : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)]
+# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)]
+def box_class85to6(input):
+    center_x_y = input[:, :2]
+    side = input[:, 2:4]
+    conf = input[:, 4:5]
+    class_id = np.argmax(input[:, 5:], axis = -1)
+    class_id = class_id.astype(np.float32).reshape(-1, 1) + 1
+    max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1)
+    x1_y1 = center_x_y - 0.5 * side
+    x2_y2 = center_x_y + 0.5 * side
+    nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1)
+    return nms_input
+
+def save2json(batch_img_id, pred_boxes, json_result):
+    for i, boxes in enumerate(pred_boxes):
+        image_id = int(batch_img_id)
+        if boxes is not None:
+            x, y, w, h, c, p = boxes
+            if image_id!=-1:
+                
+                x, y, w, h, p = float(x), float(y), float(w), float(h), float(p)
+                c = int(c)
+                json_result.append(
+                    {
+                    "image_id": image_id,
+                    "category_id": c,
+                    "bbox": [x, y, w, h],
+                    "score": p,
+                    }
+                    )
+
+# def save2json(batch_img_id, pred_boxes, json_result, class_trans):
+#     for i, boxes in enumerate(pred_boxes):
+#         if boxes is not None:
+#             image_id = int(batch_img_id[i])
+#             # have no target
+#             if image_id == -1:
+#                 continue
+#             for x, y, w, h, c, p in boxes:
+#                 x, y, w, h, p = float(x), float(y), float(w), float(h), float(p)
+#                 c = int(c)
+#                 json_result.append(
+#                     {
+#                         "image_id": image_id,
+#                         "category_id": class_trans[c - 1],
+#                         "bbox": [x, y, w, h],
+#                         "score": p,
+#                     }
+#                 )
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        allocation = cuda.mem_alloc(size)
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/config/DETR_CONFIG b/models/cv/detection/detr/ixrt/config/DETR_CONFIG
new file mode 100644
index 00000000..ec9562e1
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/config/DETR_CONFIG
@@ -0,0 +1,44 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+# BSZ : 构建engine以及推理时的batchsize
+# IMGSIZE : 模型输入hw大小
+# RUN_MODE : [FPS, MAP]
+# PRECISION : [float16, int8]
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件
+# COCO_GT : COCOEVAL标签文件
+# DATASET_DIR : 量化/推理数据集路径
+# CHECKPOINTS_DIR : 存放生成的onnx/engine路径
+# LAYER_FUSION : decoder部分走融合算子实现  0不融合 1融合
+# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致.  1:faster
+IMGSIZE=800
+MODEL_NAME=detr
+ORIGINE_MODEL=detr_res50.onnx
+DATA_PROCESS_TYPE=detr
+MODEL_INPUT_NAMES=(inputs)
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=hist_percentile
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=()
+QUANT_EXIST_ONNX=
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/datasets/__init__.py b/models/cv/detection/detr/ixrt/datasets/__init__.py
new file mode 100644
index 00000000..162e24b4
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/datasets/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/datasets/coco.py b/models/cv/detection/detr/ixrt/datasets/coco.py
new file mode 100644
index 00000000..73c5df54
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/datasets/coco.py
@@ -0,0 +1,131 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os.path
+from typing import Any, Callable, List, Optional, Tuple
+
+import cv2
+
+from .vision import VisionDataset
+from .pre_process import get_post_process
+class CocoDetection(VisionDataset):
+    """`MS Coco Detection <https://cocodataset.org/#detection-2016>`_ Dataset.
+
+    It requires the `COCO API to be installed <https://github.com/pdollar/coco/tree/master/PythonAPI>`_.
+
+    Args:
+        root (string): Root directory where images are downloaded to.
+        annFile (string): Path to json annotation file.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.PILToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+    """
+
+    def __init__(
+        self,
+        root: str,
+        annFile: str,
+        img_size: int,
+        data_process_type: str,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+        transforms: Optional[Callable] = None,
+        
+    ) -> None:
+        super().__init__(root, transforms, transform, target_transform)
+        from pycocotools.coco import COCO
+
+        self.coco = COCO(annFile)
+        self.ids = list(sorted(self.coco.imgs.keys()))
+        self.img_size = img_size
+        
+        self.transforms = get_post_process(data_process_type)
+
+    def _load_image(self, id: int):
+        path = self.coco.loadImgs(id)[0]["file_name"]
+        data = cv2.imread(os.path.join(self.root, path))
+        return data
+
+    def _load_target(self, id: int) -> List[Any]:
+        return self.coco.loadAnns(self.coco.getAnnIds(id))
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        id = self.ids[index]
+        image = self._load_image(id)
+        target = self._load_target(id)
+        origin_shape = image.shape[:2]
+
+        if self.transforms is not None:
+            image = self.transforms(image, self.img_size)
+
+        if len(target) > 0:
+            image_id = target[0]["image_id"]
+        else:
+            # have no target
+            image_id = -1
+        return image, origin_shape, image_id
+
+    def __len__(self) -> int:
+        return len(self.ids)
+
+
+class CocoCaptions(CocoDetection):
+    """`MS Coco Captions <https://cocodataset.org/#captions-2015>`_ Dataset.
+
+    It requires the `COCO API to be installed <https://github.com/pdollar/coco/tree/master/PythonAPI>`_.
+
+    Args:
+        root (string): Root directory where images are downloaded to.
+        annFile (string): Path to json annotation file.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.PILToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+
+    Example:
+
+        .. code:: python
+
+            import torchvision.datasets as dset
+            import torchvision.transforms as transforms
+            cap = dset.CocoCaptions(root = 'dir where images are',
+                                    annFile = 'json annotation file',
+                                    transform=transforms.PILToTensor())
+
+            print('Number of samples: ', len(cap))
+            img, target = cap[3] # load 4th sample
+
+            print("Image Size: ", img.size())
+            print(target)
+
+        Output: ::
+
+            Number of samples: 82783
+            Image Size: (3L, 427L, 640L)
+            [u'A plane emitting smoke stream flying over a mountain.',
+            u'A plane darts across a bright blue sky behind a mountain covered in snow',
+            u'A plane leaves a contrail above the snowy mountain top.',
+            u'A mountain that has a plane flying overheard in the distance.',
+            u'A mountain view with a plume of smoke in the background']
+
+    """
+
+    def _load_target(self, id: int) -> List[str]:
+        return [ann["caption"] for ann in super()._load_target(id)]
diff --git a/models/cv/detection/detr/ixrt/datasets/common.py b/models/cv/detection/detr/ixrt/datasets/common.py
new file mode 100644
index 00000000..febaf0ea
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/datasets/common.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import cv2
+import math
+import numpy as np
+
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, ratio, (dw, dh)
+
+def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False):
+    # Rescale boxes (xyxy) from net_shape to ori_shape
+
+    if use_letterbox:
+
+        gain = min(
+            net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1]
+        )  # gain  = new / old
+        pad = (net_shape[1] - ori_shape[1] * gain) / 2, (
+            net_shape[0] - ori_shape[0] * gain
+        ) / 2.0
+
+        boxes[:, [0, 2]] -= pad[0]  # x padding
+        boxes[:, [1, 3]] -= pad[1]  # y padding
+        boxes[:, :4] /= gain
+    else:
+        x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0]
+
+        boxes[:, 0] /= x_scale
+        boxes[:, 1] /= y_scale
+        boxes[:, 2] /= x_scale
+        boxes[:, 3] /= y_scale
+
+    clip_boxes(boxes, ori_shape)
+    return boxes
+
+def clip_boxes(boxes, shape):
+
+    boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
+    boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
+    
+    return boxes
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/datasets/post_process.py b/models/cv/detection/detr/ixrt/datasets/post_process.py
new file mode 100644
index 00000000..91afc4b6
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/datasets/post_process.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import cv2
+import math
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from .common import letterbox, scale_boxes, clip_boxes
+
+def get_post_process(data_process_type):
+    if data_process_type == "yolov5":
+        return Yolov5Postprocess
+    elif data_process_type == "yolov3":
+        return Yolov3Postprocess
+    elif data_process_type == "yolox":
+        return YoloxPostprocess
+    elif data_process_type == "detr":
+        return DetrPostprocess
+    return None
+
+def Yolov3Postprocess(
+    ori_img_shape,
+    imgsz,
+    box_datas,
+    box_nums,
+    sample_num,
+    max_det=1000,
+):
+    all_box = []
+    data_offset = 0
+
+    box_datas = box_datas.flatten()
+    box_nums = box_nums.flatten()
+
+    for i in range(sample_num):
+        box_num = box_nums[i]
+        if box_num == 0:
+            boxes = None
+        else:
+            cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6)
+            boxes = scale_boxes(
+                (imgsz[0], imgsz[1]),
+                cur_box,
+                (ori_img_shape[0][i], ori_img_shape[1][i]),
+                use_letterbox=False
+            )
+            # xyxy2xywh
+            boxes[:, 2] -= boxes[:, 0]
+            boxes[:, 3] -= boxes[:, 1]
+
+        all_box.append(boxes)
+        data_offset += max_det * 6
+
+    return all_box
+
+def Yolov5Postprocess(
+    ori_img_shape,
+    imgsz,
+    box_datas,
+    box_nums,
+    sample_num,
+    max_det=1000,
+):
+    all_box = []
+    data_offset = 0
+
+    box_datas = box_datas.flatten()
+    box_nums = box_nums.flatten()
+
+    for i in range(sample_num):
+        box_num = box_nums[i]
+        if box_num == 0:
+            boxes = None
+        else:
+            cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6)
+            boxes = scale_boxes(
+                (imgsz[0], imgsz[1]),
+                cur_box,
+                (ori_img_shape[0][i], ori_img_shape[1][i]),
+                use_letterbox=True
+            )
+            # xyxy2xywh
+            boxes[:, 2] -= boxes[:, 0]
+            boxes[:, 3] -= boxes[:, 1]
+
+        all_box.append(boxes)
+        data_offset += max_det * 6
+
+    return all_box
+
+def YoloxPostprocess(
+    ori_img_shape,
+    imgsz,
+    box_datas,
+    box_nums,
+    sample_num,
+    max_det=1000,
+):
+    all_box = []
+    data_offset = 0
+    box_datas = box_datas.flatten()
+    box_nums = box_nums.flatten()
+
+    for i in range(sample_num):
+        box_num = box_nums[i]
+        if box_num == 0:
+            boxes = None
+        else:
+            boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6)
+            r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i])
+            boxes[:, :4] /= r
+            # xyxy2xywh
+            boxes[:, 2] -= boxes[:, 0]
+            boxes[:, 3] -= boxes[:, 1]
+            clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i]))
+
+        all_box.append(boxes)
+        data_offset += max_det * 6
+
+    return all_box
+
+def box_cxcywh_to_xyxy(x):
+    x_c, y_c, w, h = x.unbind(-1)
+    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
+         (x_c + 0.5 * w), (y_c + 0.5 * h)]
+    return torch.stack(b, dim=-1)
+
+
+def convert_to_xywh(boxes):
+    xmin, ymin, xmax, ymax = boxes.unbind(-1)
+    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
+
+def DetrPostprocess(pred_logits, pred_boxes, target_sizes):
+    
+    out_logits = torch.from_numpy(pred_logits) 
+    out_bbox = torch.from_numpy(pred_boxes)
+    assert len(target_sizes) == 2
+    
+    prob = F.softmax(out_logits, -1)
+    scores, labels = prob[..., :-1].max(-1)
+    
+    # convert to [x0, y0, x1, y1] format 
+    boxes = box_cxcywh_to_xyxy(out_bbox)
+    # and from relative [0, 1] to absolute [0, height] coordinates
+    img_w, img_h = target_sizes
+    scale_fct = torch.tensor([img_w, img_h, img_w, img_h])
+    boxes = boxes * scale_fct
+   
+
+    boxes = clip_boxes(boxes, target_sizes)
+    boxes = convert_to_xywh(boxes)
+
+    labels = labels.unsqueeze(1)
+    scores =scores.unsqueeze(1)
+    pred_boxes = torch.cat([
+            boxes, 
+            labels, 
+            scores], dim=1).numpy().tolist()
+    return pred_boxes
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/datasets/pre_process.py b/models/cv/detection/detr/ixrt/datasets/pre_process.py
new file mode 100644
index 00000000..c7f490df
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/datasets/pre_process.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import cv2
+import math
+import numpy as np
+
+from .common import letterbox
+
+def get_post_process(data_process_type):
+    if data_process_type == "yolov5":
+        return Yolov5Preprocess
+    elif data_process_type == "yolov3":
+        return Yolov3Preprocess
+    elif data_process_type == "yolox":
+        return YoloxPreprocess
+    elif data_process_type == "detr":
+        return DetrPreprocess
+    return None
+
+def Yolov3Preprocess(image, img_size):
+
+    h0, w0 = image.shape[:2]  # orig hw
+    r = img_size / max(h0, w0)  # ratio
+
+    image = cv2.resize(image, (img_size, img_size))
+    image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+    image = np.ascontiguousarray(image).astype(np.float32) / 255.0  # 0~1 np array
+    return image
+
+def Yolov5Preprocess(image, img_size, augment=False):
+
+    h0, w0 = image.shape[:2]  # orig hw
+    r = img_size / max(h0, w0)  # ratio
+
+    if r != 1:  # if sizes are not equal
+        interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA
+        image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp)
+
+    # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  rect == True
+
+    image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False)
+    image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+    image = np.ascontiguousarray(image).astype(np.float32) / 255.0  # 0~1 np array
+    return image
+
+def YoloxPreprocess(img, img_size, swap=(2,0,1)):
+
+    padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114
+    r = min(img_size / img.shape[0], img_size / img.shape[1])
+    resized_img = cv2.resize(
+        img,
+        (int(img.shape[1] * r), int(img.shape[0] * r)),
+        interpolation=cv2.INTER_LINEAR, 
+    ).astype(np.uint8)
+
+    padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
+    padded_img = padded_img.transpose(swap)
+    padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
+
+    return padded_img
+
+def DetrPreprocess(image, img_size):    
+    # img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+    # img = img.resize((img_size, img_size))
+    
+    std = [0.485, 0.456, 0.406] 
+    mean = [0.229, 0.224, 0.225]
+    
+    image = cv2.resize(image, (img_size, img_size))
+    image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+    image = np.ascontiguousarray(image).astype(np.float32) / 255.0  # 0~1 np array
+    
+    image[0,:,:] = (image[0,:,:]- std[0])/mean[0]
+    image[1,:,:] = (image[1,:,:]- std[1])/mean[1]
+    image[2,:,:] = (image[2,:,:]- std[2])/mean[2]
+    
+    return image
+    
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/datasets/vision.py b/models/cv/detection/detr/ixrt/datasets/vision.py
new file mode 100755
index 00000000..eadefb2c
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/datasets/vision.py
@@ -0,0 +1,151 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+from typing import Any, Callable, List, Optional, Tuple
+
+import torch
+import torch.utils.data as data
+
+from types import FunctionType
+
+def _log_api_usage_once(obj: Any) -> None:
+
+    """
+    Logs API usage(module and name) within an organization.
+    In a large ecosystem, it's often useful to track the PyTorch and
+    TorchVision APIs usage. This API provides the similar functionality to the
+    logging module in the Python stdlib. It can be used for debugging purpose
+    to log which methods are used and by default it is inactive, unless the user
+    manually subscribes a logger via the `SetAPIUsageLogger method <https://github.com/pytorch/pytorch/blob/eb3b9fe719b21fae13c7a7cf3253f970290a573e/c10/util/Logging.cpp#L114>`_.
+    Please note it is triggered only once for the same API call within a process.
+    It does not collect any data from open-source users since it is no-op by default.
+    For more information, please refer to
+    * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging;
+    * Logging policy: https://github.com/pytorch/vision/issues/5052;
+
+    Args:
+        obj (class instance or method): an object to extract info from.
+    """
+    module = obj.__module__
+    if not module.startswith("torchvision"):
+        module = f"torchvision.internal.{module}"
+    name = obj.__class__.__name__
+    if isinstance(obj, FunctionType):
+        name = obj.__name__
+    torch._C._log_api_usage_once(f"{module}.{name}")
+
+class VisionDataset(data.Dataset):
+    """
+    Base Class For making datasets which are compatible with torchvision.
+    It is necessary to override the ``__getitem__`` and ``__len__`` method.
+
+    Args:
+        root (string): Root directory of dataset.
+        transforms (callable, optional): A function/transforms that takes in
+            an image and a label and returns the transformed versions of both.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+
+    .. note::
+
+        :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive.
+    """
+
+    _repr_indent = 4
+
+    def __init__(
+        self,
+        root: str,
+        transforms: Optional[Callable] = None,
+        transform: Optional[Callable] = None,
+        target_transform: Optional[Callable] = None,
+    ) -> None:
+        _log_api_usage_once(self)
+        if isinstance(root, str):
+            root = os.path.expanduser(root)
+        self.root = root
+
+        has_transforms = transforms is not None
+        has_separate_transform = transform is not None or target_transform is not None
+        if has_transforms and has_separate_transform:
+            raise ValueError("Only transforms or transform/target_transform can be passed as argument")
+
+        # for backwards-compatibility
+        self.transform = transform
+        self.target_transform = target_transform
+
+        if has_separate_transform:
+            transforms = StandardTransform(transform, target_transform)
+        self.transforms = transforms
+
+    def __getitem__(self, index: int) -> Any:
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            (Any): Sample and meta data, optionally transformed by the respective transforms.
+        """
+        raise NotImplementedError
+
+    def __len__(self) -> int:
+        raise NotImplementedError
+
+    def __repr__(self) -> str:
+        head = "Dataset " + self.__class__.__name__
+        body = [f"Number of datapoints: {self.__len__()}"]
+        if self.root is not None:
+            body.append(f"Root location: {self.root}")
+        body += self.extra_repr().splitlines()
+        if hasattr(self, "transforms") and self.transforms is not None:
+            body += [repr(self.transforms)]
+        lines = [head] + [" " * self._repr_indent + line for line in body]
+        return "\n".join(lines)
+
+    def _format_transform_repr(self, transform: Callable, head: str) -> List[str]:
+        lines = transform.__repr__().splitlines()
+        return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]]
+
+    def extra_repr(self) -> str:
+        return ""
+
+
+class StandardTransform:
+    def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None:
+        self.transform = transform
+        self.target_transform = target_transform
+
+    def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]:
+        if self.transform is not None:
+            input = self.transform(input)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return input, target
+
+    def _format_transform_repr(self, transform: Callable, head: str) -> List[str]:
+        lines = transform.__repr__().splitlines()
+        return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]]
+
+    def __repr__(self) -> str:
+        body = [self.__class__.__name__]
+        if self.transform is not None:
+            body += self._format_transform_repr(self.transform, "Transform: ")
+        if self.target_transform is not None:
+            body += self._format_transform_repr(self.target_transform, "Target transform: ")
+
+        return "\n".join(body)
diff --git a/models/cv/detection/detr/ixrt/deploy.py b/models/cv/detection/detr/ixrt/deploy.py
new file mode 100644
index 00000000..d1052d2b
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/deploy.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+import argparse
+from tensorrt.deploy.api import GraphTransform, create_source, create_target
+
+class Transform:
+    def __init__(self, graph):
+        self.t = GraphTransform(graph)
+        self.graph = graph
+
+    def ReplaceFocus(self, input_edge, outputs, to_op):
+        input_var = self.graph.get_variable(input_edge)
+        op = self.graph.get_operator(to_op)
+        self.t.delete_operators_between_var_op(
+            from_var=input_var, to_op=op
+        )
+        self.t.make_operator(
+            "Focus", inputs=input_edge, outputs=outputs
+        )
+        return self.graph
+
+    def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes):
+        if attributes["anchor"] is None:
+            del attributes["anchor"]
+        self.t.make_operator(
+            op_type, inputs=inputs, outputs=outputs, **attributes
+        )
+        return self.graph
+
+    def AddConcatOp(self, inputs: list, outputs, **attributes):
+        self.t.make_operator(
+            "Concat", inputs=inputs, outputs=outputs, **attributes
+        )
+        return self.graph
+
+def customize_ops(graph, args):
+    t = Transform(graph)
+    fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None
+    if fuse_focus:
+        graph = t.ReplaceFocus(
+            input_edge=args.focus_input,
+            outputs=args.focus_output,
+            to_op=args.focus_last_node
+        )
+    decoder_input = args.decoder_input_names
+    num = len(decoder_input) // 3
+    graph = t.AddYoloDecoderOp(
+        inputs=decoder_input[:num],
+        outputs=["decoder_8"],
+        op_type=args.decoder_type,
+        anchor=args.decoder8_anchor,
+        num_class=args.num_class,
+        stride=8,
+        faster_impl=args.faster
+    )
+    graph = t.AddYoloDecoderOp(
+        inputs=decoder_input[num:num*2],
+        outputs=["decoder_16"],
+        op_type=args.decoder_type,
+        anchor=args.decoder16_anchor,
+        num_class=args.num_class,
+        stride=16,
+        faster_impl=args.faster
+    )
+    graph = t.AddYoloDecoderOp(
+        inputs=decoder_input[num*2:],
+        outputs=["decoder_32"],
+        op_type=args.decoder_type,
+        anchor=args.decoder32_anchor,
+        num_class=args.num_class,
+        stride=32,
+        faster_impl=args.faster
+    )
+    graph = t.AddConcatOp(
+        inputs=["decoder_32", "decoder_16", "decoder_8"],
+        outputs=["output"],
+        axis=1
+    )
+
+    graph.outputs.clear()
+    graph.add_output("output")
+    graph.outputs["output"].dtype = "FLOAT"
+    return graph
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--src", type=str)
+    parser.add_argument("--dst", type=str)
+    parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"])
+    parser.add_argument("--decoder_input_names", nargs='+', type=str)
+    parser.add_argument("--decoder8_anchor", nargs='*', type=int)
+    parser.add_argument("--decoder16_anchor", nargs='*', type=int)
+    parser.add_argument("--decoder32_anchor", nargs='*', type=int)
+    parser.add_argument("--num_class", type=int, default=80)
+    parser.add_argument("--faster", type=int, default=1)
+    parser.add_argument("--focus_input", type=str, default=None)
+    parser.add_argument("--focus_output", type=str, default=None)
+    parser.add_argument("--focus_last_node", type=str, default=None)
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+
+    args = parse_args()
+    graph = create_source(args.src)()
+    graph = customize_ops(graph, args)
+    create_target(saved_path=args.dst).export(graph)
+    print("Surged onnx lies on", args.dst)
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/export_model.py b/models/cv/detection/detr/ixrt/export_model.py
new file mode 100644
index 00000000..55385fb3
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/export_model.py
@@ -0,0 +1,121 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import argparse
+
+import torch
+import onnx
+from onnx import shape_inference
+from onnxsim import simplify
+
+
+validate=True
+
+def stat_model(onnx_file):
+    onnx_model = onnx.load(onnx_file)
+    graph = onnx_model.graph
+
+    op_types = list()
+    for node in graph.node:
+        op_types.append(node.op_type)
+
+    print(set(op_types))
+
+def ort_inference(onnx_file, input):
+    import onnxruntime as ort
+
+    ort_session = ort.InferenceSession(onnx_file, 
+                    providers=['CPUExecutionProvider'])
+    in_name = ort_session.get_inputs()[0].name
+
+    onnx_outputs = ort_session.get_outputs()
+    output_names = []
+    for o in onnx_outputs:
+        output_names.append(o.name)
+ 
+    input_np = input.clone().cpu().numpy()
+    out = ort_session.run(output_names, 
+                            input_feed={in_name: input_np}
+                        )
+    return out
+
+def convert_model(onnx_file, config):
+    model = torch.hub.load('facebookresearch/detr:main', 'detr_resnet50', pretrained=True)
+    model.eval()
+
+    input = torch.randn([config.bsz, 3, config.img_H, config.img_W])
+    out = model(input)
+    torch.onnx.export(
+        model,
+        input,
+        onnx_file,
+        verbose = False,
+        input_names = ["input"],
+        output_names = ["pred_logits","pred_boxes"],
+        opset_version = 11
+    )
+
+    onnx_model = onnx.load(onnx_file)  # load onnx model
+    model_simp, check = simplify(onnx_model)
+    assert check, "Simplified ONNX model could not be validated"
+
+    onnx_model = shape_inference.infer_shapes(model_simp)
+
+    onnx.save(onnx_model, onnx_file)
+    print('finished exporting onnx')
+
+    # stat_model(onnx_file)
+
+    if validate:
+        torch_out = model(input)["pred_logits"]
+        onnx_out = ort_inference(onnx_file, input)[0]
+
+        import numpy as np
+        torch_out = torch_out.detach().numpy()
+        diff = np.abs(torch_out-onnx_out).max()
+        print(diff)
+        #sim = cosine_similarity(torch_out.reshape(1,-1), onnx_out.reshape(1, -1))
+        #print(sim[0])
+
+
+def parse_config():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--torch_file", type=str,  help="torch model")
+    parser.add_argument("--onnx_file", type=str,  help="onnx model",default="")
+    parser.add_argument("--bsz", type=int, default=1, help="test batch size")
+    parser.add_argument(
+        "--img_H",
+        type=int,
+        default=800,
+        help="inference size h",
+    )
+    parser.add_argument(
+        "--img_W",
+        type=int,
+        default=800,
+        help="inference size W",
+    )
+
+
+    config = parser.parse_args()
+    return config
+
+if __name__ == "__main__":
+
+    config = parse_config()
+    onnx_file = config.onnx_file
+    convert_model(onnx_file, config)
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/extract_graph_weight.py b/models/cv/detection/detr/ixrt/extract_graph_weight.py
new file mode 100644
index 00000000..9094316e
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/extract_graph_weight.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+import collections
+import json
+import os
+
+import numpy as np
+import onnx
+
+def parse_onnx_model(onnx_model):
+    graph = onnx_model.graph
+    nodes = graph.node
+    initializer = graph.initializer
+    value_info = graph.value_info
+    model = {}
+    model["nodes"] = {}
+    model["tensors"] = {}
+    model["edges"] = {}
+    all_edge = []
+    for i, item in enumerate(nodes):
+        node_name = item.name
+        input_edge_list = list(item.input)
+        output_edge_list = list(item.output)
+        all_edge.extend(input_edge_list)
+        all_edge.extend(output_edge_list)
+        node_dict = {"inputs": input_edge_list, "outputs": output_edge_list}
+        node_dict["op_type"] = item.op_type
+        attribute_dict = {}
+        for attr in item.attribute:
+            if attr.type == onnx.AttributeProto().AttributeType.FLOAT:
+                attribute_dict[attr.name] = attr.f
+            if attr.type == onnx.AttributeProto().AttributeType.FLOATS:
+                attribute_dict[attr.name] = [x for x in attr.floats]
+            if attr.type == onnx.AttributeProto().AttributeType.INT:
+                attribute_dict[attr.name] = attr.i
+            if attr.type == onnx.AttributeProto().AttributeType.INTS:
+                attribute_dict[attr.name] = [x for x in attr.ints]
+            if attr.type == onnx.AttributeProto().AttributeType.STRING:
+                attribute_dict[attr.name] = str(attr.s.decode("UTF-8"))
+            if attr.type == onnx.AttributeProto().AttributeType.STRINGS:
+                attribute_dict[attr.name] = [
+                    str(x.decode("UTF-8")) for x in attr.strings
+                ]
+        node_dict["attrbiute"] = attribute_dict
+        model["nodes"][node_name] = node_dict
+
+    constant_edge = []
+    for i, item in enumerate(initializer):
+        tensor_name = item.name
+        constant_edge.append(tensor_name)
+        if item.data_type == 1:
+            tensor_dict = {"data_type": "float32"}
+        elif item.data_type == 3:
+            tensor_dict = {"data_type": "int32"}
+        elif item.data_type == 7:
+            tensor_dict = {"data_type": "int64"}    
+        tensor_dict["dims"] = list(item.dims)
+
+        model["tensors"][tensor_name] = tensor_dict
+
+    miss_edge = []
+    for edge in all_edge:
+        if edge not in constant_edge:
+            miss_edge.append(edge)
+
+    for info in value_info:
+        info_name = info.name
+        if info_name in miss_edge:
+            edge_dict = {
+                "dims": [int(x.dim_value) for x in info.type.tensor_type.shape.dim]
+            }
+            model["edges"][info_name] = edge_dict
+
+    """
+    Export weight
+    """
+    var_dict = collections.OrderedDict()
+    for item in initializer:
+        tensor_name = item.name
+        tensor_shape = list(item.dims)
+        if len(tensor_shape) == 0:
+            continue
+
+        if item.data_type == 1 and len(item.float_data):
+            np_data = np.array(list(item.float_data), dtype=np.float32)
+            np_data = np_data.reshape(tensor_shape)
+            var_dict[tensor_name] = np_data
+        elif item.data_type == 1 and len(item.raw_data):
+            np_data = np.frombuffer(item.raw_data, dtype=np.float32)
+            np_data = np_data.reshape(tensor_shape)
+            var_dict[tensor_name] = np_data
+        elif item.data_type == 3 and len(item.int32_data):
+            np_data = np.array(list(item.int32_data), dtype=np.int32)
+            np_data = np_data.reshape(tensor_shape)
+            var_dict[tensor_name] = np_data
+        elif item.data_type == 3 and len(item.raw_data):
+            np_data = np.frombuffer(item.raw_data, dtype=np.int32)
+            np_data.dtype = np.int32
+            np_data = np_data.reshape(tensor_shape)
+            var_dict[tensor_name] = np_data
+        elif item.data_type == 7 and len(item.raw_data):   
+            np_data = np.frombuffer(item.raw_data, dtype=np.int64)
+            np_data = np_data.reshape(tensor_shape)
+            var_dict[tensor_name] = np_data
+        elif item.data_type == 7 and len(item.int64_data):
+            temp = []
+            for i in item.int64_data:
+                temp.append(i)
+            np_data = np.array(temp, dtype=np.int64)
+            np_data = np_data.reshape(tensor_shape)
+            var_dict[tensor_name] = np_data
+        else:
+            print(
+                "tensor name: ",
+                tensor_name,
+                ", type: ",
+                item.data_type,
+                ", len: ",
+                len(item.raw_data),
+                len(item.float_data),
+                len(item.int32_data),
+                len(item.int64_data),
+                ", will not save into weights file",
+            )
+    return model, var_dict
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/inference.py b/models/cv/detection/detr/ixrt/inference.py
new file mode 100755
index 00000000..eb33b614
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/inference.py
@@ -0,0 +1,239 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+import glob
+import json
+import os
+import time
+import sys
+
+import torch
+import numpy as np
+import pycuda.autoinit
+import pycuda.driver as cuda
+
+from coco_labels import coco80_to_coco91_class, labels
+from common import save2json, box_class85to6
+from common import create_engine_context, get_io_bindings
+from calibration_dataset import create_dataloaders
+from datasets.post_process import get_post_process
+
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from tqdm import tqdm
+from tqdm.contrib import tzip
+
+import tensorrt
+from load_ixrt_plugin import load_ixrt_plugin
+load_ixrt_plugin()
+
+def main(config):
+
+    # Load dataloader
+    dataloader = create_dataloaders(
+        data_path=config.eval_dir,
+        annFile=config.coco_gt,
+        img_sz=config.imgsz,
+        batch_size=config.bsz,
+        step=config.loop_count,
+        data_process_type=config.data_process_type,
+        workers=8
+    )
+
+    # Load post process func
+    if config.test_mode == "MAP":
+        post_process_func = get_post_process(config.data_process_type)
+
+    bsz = config.bsz
+    num_samples = 5000
+    if config.loop_count > 0:
+        num_samples = bsz * config.loop_count
+    num_batch = len(dataloader)
+    print("=" * 30)
+    print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}")
+    print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}")
+    print("=" * 30)
+
+    json_result = []
+    forward_time = 0.0
+    class_map = coco80_to_coco91_class()
+
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+    # Load Engine
+    engine, context = create_engine_context(config.model_engine, logger)
+    inputs, outputs, allocations = get_io_bindings(engine)
+
+    # Warm up
+    if config.warm_up > 0:
+        print("\nWarm Start.")
+        for i in range(config.warm_up):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+
+    # Prepare the output data
+    batch_pred_logits = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+    batch_pred_boxes = np.zeros(outputs[1]["shape"], outputs[1]["dtype"])
+    print(f"pred_logits shape : {batch_pred_logits.shape} pred_logits type : {batch_pred_logits.dtype}")
+    print(f"pred_boxes shape : {batch_pred_boxes.shape} pred_boxes type : {batch_pred_boxes.dtype}")
+
+    for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
+        batch_data = batch_data.numpy()
+        batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
+
+        cur_bsz_sample = batch_data.shape[0]
+
+        # Set input
+        cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+
+        # Forward
+        start_time = time.time()
+        context.execute_v2(allocations)
+        end_time = time.time()
+        forward_time += end_time - start_time
+
+        if config.test_mode == "MAP":
+            # Fetch output
+            cuda.memcpy_dtoh(batch_pred_logits, outputs[0]["allocation"])
+            cuda.memcpy_dtoh(batch_pred_boxes, outputs[1]["allocation"])
+
+            for (pred_logits, pred_boxes, img_h, img_w, img_id) in zip(
+                batch_pred_logits, 
+                batch_pred_boxes, 
+                batch_img_shape[0],
+                batch_img_shape[1], 
+                batch_img_id):
+                pred_boxes = post_process_func(pred_logits, pred_boxes, [img_w, img_h])  
+                # print(img_id)
+                # print(img_w, img_h)
+                
+                # import ipdb
+                # ipdb.set_trace()
+                      
+                save2json(img_id, pred_boxes, json_result)
+
+    fps = num_samples / forward_time
+
+    if config.test_mode == "FPS":
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+        if fps >= config.fps_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+    if config.test_mode == "MAP":
+        if len(json_result) == 0:
+            print("Predict zero box!")
+            exit(1)
+
+        if not os.path.exists(config.pred_dir):
+            os.makedirs(config.pred_dir)
+
+        pred_json = os.path.join(
+            config.pred_dir, f"{config.model_name}_{config.precision}_preds.json"
+        )
+        with open(pred_json, "w") as f:
+            json.dump(json_result, f)
+
+        anno_json = config.coco_gt
+        anno = COCO(anno_json)  # init annotations api
+        pred = anno.loadRes(pred_json)  # init predictions api
+        eval = COCOeval(anno, pred, "bbox")
+
+        eval.evaluate()
+        eval.accumulate()
+        print(
+            f"==============================eval {config.model_name} {config.precision} coco map =============================="
+        )
+        eval.summarize()
+
+        map, map50 = eval.stats[:2]
+        print("MAP@0.5 : ", map50)
+        print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        if map50 >= config.map_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+def parse_config():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX"
+    )
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+            help="The precision of datatype")
+    parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+    parser.add_argument(
+        "--model_engine",
+        type=str,
+        default="",
+        help="model engine path",
+    )
+    parser.add_argument(
+        "--nms_engine",
+        type=str,
+        default="",
+        help="nms engine path",
+    )
+    parser.add_argument(
+        "--coco_gt",
+        type=str,
+        default="data/datasets/cv/coco2017/annotations/instances_val2017.json",
+        help="coco instances_val2017.json",
+    )
+    parser.add_argument("--warm_up", type=int, default=3, help="warm_up count")
+    parser.add_argument("--loop_count", type=int, default=-1, help="loop count")
+    parser.add_argument(
+        "--eval_dir",
+        type=str,
+        default="data/datasets/cv/coco2017/val2017",
+        help="coco image dir",
+    )
+    parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+    parser.add_argument(
+        "--imgsz",
+        "--img",
+        "--img-size",
+        type=int,
+        default=640,
+        help="inference size h,w",
+    )
+    parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image")
+    parser.add_argument("--data_process_type", type=str,  default="none")
+    parser.add_argument("--use_async", action="store_true")
+    parser.add_argument("--debug", action="store_true")
+    parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs")
+    parser.add_argument("--map_target", type=float, default=0.56, help="target mAP")
+    parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps")
+    parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly")
+    parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU")
+
+    config = parser.parse_args()
+    print("config:", config)
+    return config
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/load_ixrt_plugin.py b/models/cv/detection/detr/ixrt/load_ixrt_plugin.py
new file mode 100644
index 00000000..ae47dc8e
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/load_ixrt_plugin.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import ctypes
+import tensorrt
+from os.path import join, dirname, exists
+def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""):
+    if not dynamic_path:
+        dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so")
+    if not exists(dynamic_path):
+        raise FileNotFoundError(
+            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
+    ctypes.CDLL(dynamic_path)
+    tensorrt.init_libnvinfer_plugins(logger, namespace)
+    print(f"Loaded plugin from {dynamic_path}")
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/modify_batchsize.py b/models/cv/detection/detr/ixrt/modify_batchsize.py
new file mode 100644
index 00000000..5c01e8b6
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/modify_batchsize.py
@@ -0,0 +1,179 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+from copy import deepcopy
+import numpy as np
+import onnx
+from onnx import numpy_helper
+
+from extract_graph_weight import parse_onnx_model
+            
+            
+def modify_shape_dim(dim, bsz):
+    batch_size = bsz
+    # update dim to be a symbolic value
+    if isinstance(batch_size, str):
+        # set dynamic batch size
+        dim.dim_param = batch_size
+    elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+        # set given batch size
+        dim.dim_value = int(batch_size)
+    else:
+        # set batch size of 1
+        dim.dim_value = 1
+
+def change_input_dim(onnx_model, bsz):
+    inputs = onnx_model.graph.input
+    for input in inputs:
+        dim1 = input.type.tensor_type.shape.dim[0]
+        old_bsz = dim1.dim_value
+        modify_shape_dim(dim1, bsz)
+        return old_bsz
+
+# input[1] shape is initializer
+def change_reshape_initializer(model, var_dict, old_bsz, bsz):
+    print("change_reshape_initializer")
+    modified_list = list()
+    for name, node_dict in model["nodes"].items():
+        if node_dict["op_type"] != "Reshape":
+            continue
+        shape_name = node_dict["inputs"][1]
+        new_datas = deepcopy(var_dict[shape_name])
+        done = False
+        if (len(new_datas) == 2):
+            if new_datas[0] == 625:
+                new_datas[0] = 625 * (bsz / old_bsz)
+            if new_datas[0] / old_bsz == 100:
+                new_datas[0] = 100 * bsz
+        elif (len(new_datas) == 3):
+            for i in range(len(new_datas)):  
+                if new_datas[i] == old_bsz:
+                    new_datas[i] = bsz
+                    done = True
+            if done == False:
+                for i in range(len(new_datas)):  
+                    if new_datas[i] / old_bsz == 8:
+                        new_datas[i] = (bsz / old_bsz) * 8
+                        done = True
+            
+        var_dict[shape_name] = new_datas
+        modified_list.append(shape_name)
+    return modified_list
+
+def change_matmul_initializer(model, var_dict, bsz):
+    print("change_matmul_initializer")
+    modified_list = list()
+    for name, node_dict in model["nodes"].items():
+        if node_dict["op_type"] != "MatMul":
+            continue
+        for edge_name in node_dict["inputs"]:
+            if edge_name not in var_dict:
+                continue
+            if len(var_dict[edge_name].shape) != 3:
+                continue
+            data = deepcopy(var_dict[edge_name])
+
+            datas = list()
+            for _ in range(bsz):
+                datas.append(data)
+            new_datas = np.concatenate(datas, axis=0)
+            var_dict[edge_name] = new_datas
+            modified_list.append(edge_name)
+    return modified_list
+
+def change_add_initializer(model, var_dict, bsz):
+    print("change_add_initializer")
+    modified_list = list()
+    for name, node_dict in model["nodes"].items():
+        if node_dict["op_type"] != "Add":
+            continue
+        for edge_name in node_dict["inputs"]:
+            if edge_name not in var_dict:
+                continue
+            if len(var_dict[edge_name].shape) != 3:
+                continue
+            data = deepcopy(var_dict[edge_name])[:, 0:1, ...]
+
+            datas = list()
+            for _ in range(bsz):
+                datas.append(data)
+            new_datas = np.concatenate(datas, axis=1)
+            var_dict[edge_name] = new_datas
+            modified_list.append(edge_name)
+    return modified_list
+
+# A certain mode, input for Concat operator maybe constant.
+def change_concat_initializer(model, var_dict, bsz):
+    print("change_concat_initializer")
+    modified_list = list()
+    for name, node_dict in model["nodes"].items():
+        if node_dict["op_type"] != "Concat":
+            continue
+        for edge_name in node_dict["inputs"]:
+            if edge_name not in var_dict:
+                continue
+            data = deepcopy(var_dict[edge_name])[0:1, ...]
+
+            datas = list()
+            for _ in range(bsz):
+                datas.append(data)
+            new_datas = np.concatenate(datas, axis=0)
+            var_dict[edge_name] = new_datas
+            modified_list.append(edge_name)
+    return modified_list
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--batch_size", type=int)
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    onnx_model = onnx.load(args.origin_model)
+    bsz = args.batch_size
+    old_bsz = change_input_dim(onnx_model, bsz)
+    if old_bsz == bsz:
+        print("Change batch size skipped")
+        onnx.save(onnx_model, args.output_model)
+        exit()
+
+    model, weights = parse_onnx_model(onnx_model)
+
+    modified_list = list()
+    reshape_modified = change_reshape_initializer(model, weights, old_bsz, bsz)
+    concat_modified = change_concat_initializer(model, weights, bsz)
+    matmul_modified = change_matmul_initializer(model, weights, bsz)
+    add_modified = change_add_initializer(model, weights, bsz)
+    modified_list.extend(reshape_modified)
+    modified_list.extend(concat_modified)
+    modified_list.extend(matmul_modified)
+    modified_list.extend(add_modified)
+
+    # Remove the old initializer, and append new.
+    initializer = onnx_model.graph.initializer
+    for name in modified_list:
+        for item in initializer:
+            if name == item.name:
+                initializer.remove(item)
+
+        data = weights[name]
+        new_params = numpy_helper.from_array(data, name=name)
+        initializer.append(new_params)
+
+    onnx.save(onnx_model, args.output_model)
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/refine_model.py b/models/cv/detection/detr/ixrt/refine_model.py
new file mode 100644
index 00000000..0483e0e9
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/refine_model.py
@@ -0,0 +1,291 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import argparse
+import dataclasses
+
+import torch
+import onnx
+
+from refine_utils.matmul_to_gemm_pass import FusedGemmPass
+from refine_utils.linear_pass import FusedLinearPass
+
+from refine_utils.common import *
+
+def get_constant_input_name_of_operator(graph: Graph, operator: Operator):
+    const = None
+    for input in operator.inputs:
+        if not graph.containe_var(input):
+            continue
+
+        if not graph.is_leaf_variable(input):
+            continue
+
+        input_var = graph.get_variable(input)
+        if input_var.value is not None:
+            const = input
+    return const 
+
+class FuseLayerNormPass(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        self.transform = GraphTransform(graph)
+        find_sequence_subgraph(
+            graph,
+            [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD],
+            self.fuse_layer_norm,
+            strict=False
+        )
+        return graph
+
+    def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph):
+        # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的
+        if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]:
+            return
+
+        # 检查 POW 的输入是否和 DIV 的输入是一致的
+        if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]:
+            return
+
+        # 检查部分算子的输出是否被多个算子使用
+        nodes = pattern.nodes
+        for node in [nodes[0]] + nodes[2:-1]:
+            next_ops = graph.get_next_operators(node.operator)
+            if len(next_ops) > 1:
+                return
+
+        eps = None
+        for input in nodes[4].operator.inputs:
+            input_var = graph.get_variable(input)
+            if input_var.value is not None and graph.is_leaf_variable(input):
+                eps = to_py_type(input_var.value)
+
+        scale = get_constant_input_name_of_operator(graph, nodes[-2].operator)
+        bias = get_constant_input_name_of_operator(graph, nodes[-1].operator)
+
+        self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator)
+        
+        bias_var = graph.get_variable(bias)
+        print(bias_var)
+        
+        attributes = {
+            "axis": nodes[0].operator.attributes.axes,
+            "epsilon": eps,
+        }
+        
+        
+        layer_norm_op = self.transform.make_operator(
+            op_type="LayerNormalization",
+            inputs=[nodes[0].operator.inputs[0], scale, bias],
+            outputs=[nodes[-1].operator.outputs[0]],
+            **attributes
+        )
+
+        self.transform.add_operator(layer_norm_op)
+
+class FusedGeluPass(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        self.transform = GraphTransform(graph)
+
+        find_sequence_subgraph(
+            graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True
+        )
+        return graph
+
+    def fuse_gelu(self, graph: Graph, pattern: PatternGraph):
+        nodes = pattern.nodes
+        prev_op = self.transform.get_previous_operators(nodes[0].operator)[0]
+        next_ops = self.transform.get_next_operators(prev_op)
+        if len(next_ops) != 2:
+            return
+
+        if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops:
+            return
+
+        gelu_op_input = None
+        for input in nodes[3].operator.inputs:
+            if input in nodes[0].operator.inputs:
+                gelu_op_input = input
+                break
+
+        self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator)
+
+        gelu_op = self.transform.make_operator(
+            op_type=OP.GELU,
+            inputs=[gelu_op_input],
+            outputs=[nodes[-1].operator.outputs[0]]
+        )
+        self.transform.add_operator(gelu_op)
+
+@dataclasses.dataclass
+class NormalizeAttr(BaseOperatorAttr):
+    p: float = 2.0
+    epsilon: float = 1e-12
+    axis: int = 1
+
+
+@registe_operator(OP.GELU)
+class GeluOperator(BaseOperator):
+
+    def call(
+        self,
+        executor,
+        operator: Operator,
+        inputs: List,
+        attr: NormalizeAttr,
+    ):
+        return F.gelu(inputs[0])
+
+    def convert_onnx_operator(
+        self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto
+    ) -> Operator:
+        return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr)
+
+    def quantize(
+        self,
+        graph: Graph,
+        op: Operator,
+        operator_observer_config: QuantOperatorObserverConfig,
+        quant_outputs: bool = False,
+    ):
+        return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs)
+
+
+
+class ClearUnsedVariables(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        vars = list(graph.variables)
+
+        for var in vars:
+            if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var):
+                graph.delete_variable(var)
+
+        quant_params = list(graph.quant_parameters.keys())
+        for var in quant_params:
+            if not graph.containe_var(var):
+                graph.quant_parameters.pop(var)
+
+        return graph
+
+class FormatLayerNorm(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        for op in graph.operators.values():
+            if "LayerNormalization" in op.op_type:
+                self.format_layer_norm(graph, op)
+        return graph
+
+    def format_layer_norm(self, graph, operator):
+        if not hasattr(operator.attributes, "axis"):
+            return
+        if isinstance(operator.attributes.axis, (tuple, list)):
+            operator.attributes.axis = operator.attributes.axis[0]
+
+class FormatReshape(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        for op in graph.operators.values():
+            if op.op_type == "Reshape":
+                self.format_reshape(graph, op)
+
+        return graph
+
+    def format_reshape(self, graph, operator):
+        shape = graph.get_variable(operator.inputs[1])
+        shape.value = torch.tensor(shape.value, dtype=torch.int64)
+
+class FormatScalar(BasePass):
+
+    def process(self, graph: Graph):
+        for var in graph.variables.values():
+            var: Variable
+            use_ops = graph.get_dst_operators(var)
+
+            if len(use_ops) == 0:
+                continue
+
+            if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]:
+                continue
+
+            if var.value is not None and var.value.ndim == 0:
+                var.value = var.value.reshape(1)
+                print(f"Reshape scalar to tensor for {var.name}.")
+
+        return graph
+
+class RenamePass(BasePass):
+
+    def process(self, graph:Graph):
+
+        names = [name for name in graph.operators.keys()]
+        for old_name in names:
+            new_name = old_name.replace("/", "#")
+
+            graph.rename_operator(old_name, new_name)
+
+        names = [name for name in graph.variables.keys()]
+        for name in names:
+            new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out")
+
+            graph.rename_vaiable(name, new_name,
+                                with_variables=True, 
+                                with_operator_outputs=True)
+
+        return graph
+
+def create_pipeline(example_inputs):
+    return PassSequence(
+        FuseLayerNormPass(),
+        FusedGeluPass(),
+
+        ClearUnsedVariables(),
+        FormatLayerNorm(),
+        FormatReshape(),
+        # FormatScalar(),
+        # RenamePass()
+    )
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--onnx_path", type=str)
+    parser.add_argument("--dst_onnx_path", type=str)
+
+    parser.add_argument("--bsz", type=int, default=8,
+                        help="Batch size")
+    parser.add_argument("--imgsz", type=int, default=224,
+                        help="Image size")
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz)
+
+    refine_pipline = Pipeline(
+        create_source(f"{args.onnx_path}", example_inputs=example_inputs),
+        create_pipeline(example_inputs),
+        create_target(
+            f"{args.dst_onnx_path}",
+            example_inputs=example_inputs,
+        )
+    )
+    refine_pipline.run()
+
+    print(f"refine the model, input shape={example_inputs.shape}")
diff --git a/models/cv/detection/detr/ixrt/refine_utils/__init__.py b/models/cv/detection/detr/ixrt/refine_utils/__init__.py
new file mode 100644
index 00000000..162e24b4
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/refine_utils/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/refine_utils/common.py b/models/cv/detection/detr/ixrt/refine_utils/common.py
new file mode 100644
index 00000000..b19dccfc
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/refine_utils/common.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from typing import Union, Callable, List
+
+from tensorrt.deploy.api import *
+from tensorrt.deploy.backend.onnx.converter import default_converter
+from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type
+from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr
+from tensorrt.deploy.ir.operator_type import OperatorType as OP
+from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name
+from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence
+from tensorrt.deploy.ir import Graph
+from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator
+from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator
+
+def find_sequence_subgraph(graph,
+                           pattern: Union[List[str], PatternGraph],
+                           callback: Callable[[Graph, PatternGraph], None],
+                           strict=True):
+    if isinstance(pattern, List):
+        pattern = build_sequence_graph(pattern)
+
+    matcher = GraphMatcher(pattern, strict=strict)
+    return matcher.findall(graph, callback)
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/refine_utils/linear_pass.py b/models/cv/detection/detr/ixrt/refine_utils/linear_pass.py
new file mode 100644
index 00000000..bab7e575
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/refine_utils/linear_pass.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import dataclasses
+
+from refine_utils.common import *
+
+# AXB=C, Only for B is initializer
+
+class FusedLinearPass(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        self.transform = GraphTransform(graph)
+
+        find_sequence_subgraph(
+            graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True
+        )
+        find_sequence_subgraph(
+            graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True
+        )
+        return graph
+
+    def to_linear_with_bias(self, graph, pattern: PatternGraph):
+        matmul = pattern.nodes[0]
+        add = pattern.nodes[1]
+        if len(add.operator.inputs) != 2:
+            return
+
+        b_var = graph.get_variable(matmul.operator.inputs[1])
+        if not graph.is_leaf_variable(b_var) or b_var.value is None:
+            return
+
+        if b_var.value.ndim != 2:
+            return
+
+        bias_var = None
+        for input in add.operator.inputs:
+            if input not in matmul.operator.outputs:
+                bias_var = input
+
+        inputs = matmul.operator.inputs
+        inputs.append(bias_var)
+        outputs = add.operator.outputs
+
+        b_var.value =  b_var.value.transpose(1, 0)
+        b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0]
+        
+        hidden_size = b_var.shape[1]
+        linear_dim = b_var.shape[0]
+        
+        attributes = {
+            "hidden_size": hidden_size,
+            "linear_dim":  linear_dim,
+            "has_bias": 1,
+            "act_type":"none"
+        }
+        
+        self.transform.make_operator(
+            "LinearFP16",
+            inputs=inputs,
+            outputs=outputs,
+            **attributes
+        )
+        
+        self.transform.delete_operator(add.operator)
+        self.transform.delete_operator(matmul.operator)
+
+    def to_linear(self, graph, pattern: PatternGraph):
+        matmul = pattern.nodes[0]
+        if len(matmul.operator.inputs) != 2:
+            return
+
+        b_var = graph.get_variable(matmul.operator.inputs[1])
+        if not graph.is_leaf_variable(b_var) or b_var.value is None:
+            return
+
+        if b_var.value.ndim != 2:
+            return
+
+        attributes = {
+            "hidden_size": hidden_size,
+            "linear_dim":  linear_dim,
+            "has_bias":    0,
+            "act_type":    "none"
+        }
+
+        b_var.value =  b_var.value.transpose(1, 0)
+        b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0]
+        
+        hidden_size = b_var.shape[1]
+        linear_dim = b_var.shape[0]
+
+        op = self.transform.make_operator(
+            op_type = "LinearFP16",
+            inputs = pattern.nodes[0].operator.inputs,
+            outputs=[pattern.nodes[-1].operator.outputs[0]],
+            **attributes
+        )
+
+        self.transform.add_operator(op)
+
+        self.transform.delete_operator(matmul.operator)
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py
new file mode 100644
index 00000000..5823c4a5
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from refine_utils.common import *
+
+#
+#   Common pattern Matmul to Gemm
+#
+class FusedGemmPass(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        self.transform = GraphTransform(graph)
+
+        find_sequence_subgraph(
+            graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True
+        )
+        return graph
+
+    def to_gemm(self, graph, pattern: PatternGraph):
+        matmul_op = pattern.nodes[0]
+        inputs = matmul_op.operator.inputs
+        outputs = matmul_op.operator.outputs
+
+        if len(inputs)!=2 and len(outputs)!=1:
+            return
+
+        for input in inputs:
+            if self.transform.is_leaf_variable(input):
+                return
+
+        print(f"{self.transform.get_variable(inputs[0]).shape}   {self.transform.get_variable(inputs[1]).shape}")
+        self.transform.delete_operator(matmul_op.operator)
+
+        op = self.transform.make_operator(
+            op_type = "Gemm",
+            inputs = inputs,
+            outputs = outputs,
+            alpha = 1,
+            beta = 1,
+            transB = 1
+        )
+
+        self.transform.add_operator(op)
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh
new file mode 100755
index 00000000..f81312a1
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh
@@ -0,0 +1,142 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+#!/bin/bash
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=1
+WARM_UP=-1
+TGT=-1
+LOOP_COUNT=-1
+RUN_MODE=MAP
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+faster=0
+CURRENT_MODEL=${ORIGINE_MODEL}
+if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then
+    faster=1
+fi
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model Skipped, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+            --origin_model $ORIGINE_MODEL    \
+            --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Refine Model
+let step++
+echo;
+echo [STEP ${step}] : Refine Model
+REFINE_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_refine.onnx
+if [ -f ${REFINE_MODEL} ];then
+    echo "  "Refine Model Skipped, ${REFINE_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/refine_model.py \
+            --onnx_path  ${SIM_MODEL} \
+            --dst_onnx_path ${REFINE_MODEL} \
+            --bsz  ${BSZ}               \
+            --imgsz  ${IMGSIZE}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skipped, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py \
+            --batch_size ${BSZ} \
+            --origin_model ${REFINE_MODEL} \
+            --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py                 \
+    --model_engine=${ENGINE_FILE}               \
+    --coco_gt=${COCO_GT}                        \
+    --eval_dir=${EVAL_DIR}                      \
+    --data_process_type ${DATA_PROCESS_TYPE}    \
+    --imgsz=${IMGSIZE}                          \
+    --warm_up=${WARM_UP}                        \
+    --loop_count ${LOOP_COUNT}                  \
+    --test_mode ${RUN_MODE}                     \
+    --model_name ${MODEL_NAME}                  \
+    --precision  ${PRECISION}                   \
+    --pred_dir   ${CHECKPOINTS_DIR}             \
+    --map_target ${TGT}                         \
+    --bsz ${BSZ}; check_status
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh
new file mode 100755
index 00000000..a3881a3c
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh
@@ -0,0 +1,142 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+#!/bin/bash
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=1
+WARM_UP=3
+TGT=-1
+LOOP_COUNT=10
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+faster=0
+CURRENT_MODEL=${ORIGINE_MODEL}
+if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then
+    faster=1
+fi
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model Skipped, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+            --origin_model $ORIGINE_MODEL    \
+            --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Refine Model
+let step++
+echo;
+echo [STEP ${step}] : Refine Model
+REFINE_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_refine.onnx
+if [ -f ${REFINE_MODEL} ];then
+    echo "  "Refine Model Skipped, ${REFINE_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/refine_model.py \
+            --onnx_path  ${SIM_MODEL} \
+            --dst_onnx_path ${REFINE_MODEL} \
+            --bsz  ${BSZ}               \
+            --imgsz  ${IMGSIZE}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skipped, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py \
+            --batch_size ${BSZ} \
+            --origin_model ${REFINE_MODEL} \
+            --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py                 \
+    --model_engine=${ENGINE_FILE}               \
+    --coco_gt=${COCO_GT}                        \
+    --eval_dir=${EVAL_DIR}                      \
+    --data_process_type ${DATA_PROCESS_TYPE}    \
+    --imgsz=${IMGSIZE}                          \
+    --warm_up=${WARM_UP}                        \
+    --loop_count ${LOOP_COUNT}                  \
+    --test_mode ${RUN_MODE}                     \
+    --model_name ${MODEL_NAME}                  \
+    --precision  ${PRECISION}                   \
+    --pred_dir   ${CHECKPOINTS_DIR}             \
+    --fps_target ${TGT}                         \
+    --bsz ${BSZ}; check_status
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/simplify_model.py b/models/cv/detection/detr/ixrt/simplify_model.py
new file mode 100644
index 00000000..1400fd81
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/simplify_model.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+    onnx_model = onnx.load(args.origin_model)
+    model_simp, check = simplify(onnx_model)
+    model_simp = onnx.shape_inference.infer_shapes(model_simp)
+    onnx.save(model_simp, args.output_model)
+    print("  Simplify onnx Done.")
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+simplify_model(args)
\ No newline at end of file
-- 
Gitee