diff --git a/models/cv/multi_object_tracking/deepsort/ixrt/README.md b/models/cv/multi_object_tracking/deepsort/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7e848db35c84d353a54b190c83415a2b69dd3c05
--- /dev/null
+++ b/models/cv/multi_object_tracking/deepsort/ixrt/README.md
@@ -0,0 +1,55 @@
+# DeepSort (ixRT)
+
+## Model Description
+
+DeepSort integrates deep neural networks with traditional tracking methods to achieve robust and accurate tracking of objects in video streams. The algorithm leverages a combination of a deep appearance feature extractor and the Hungarian algorithm for data association.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0 | 25.12 |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model(ckpt.t7): <https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6>
+
+Dataset: <https://zheng-lab.cecs.anu.edu.au/Project/project_reid.html> to download the market1501 dataset.
+
+### Install Dependencies
+
+```bash
+pip3 install -r requirements.txt
+```
+
+### Model Conversion
+
+```bash
+python3 export.py --weight ckpt.t7 --output deepsort.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim deepsort.onnx deepsort_opt.onnx
+```
+
+## Model Inference
+
+```bash
+export DATASETS_DIR=/Path/to/market1501/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_deepsort_fp16_accuracy.sh
+# Performance
+bash scripts/infer_deepsort_fp16_performance.sh
+```
+
+## Model Results
+
+| Model    | BatchSize | Precision | FPS      | Acc(%) |
+| :----: | :----: | :----: | :----: | :----: |
+| DeepSort | 32        | FP16      | 16991.95 | 99.32  |
diff --git a/models/cv/multi_object_tracking/deepsort/ixrt/build_engine.py b/models/cv/multi_object_tracking/deepsort/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff88a6c0b4757c990d7f43b77e1512eaaa12ee08
--- /dev/null
+++ b/models/cv/multi_object_tracking/deepsort/ixrt/build_engine.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+from tensorrt import Dims
+
+def main(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    profile = builder.create_optimization_profile()
+    profile.set_shape("input", Dims([32, 3, 128, 64]), Dims([32, 3, 128, 64]), Dims([32, 3, 128, 64]))
+    build_config.add_optimization_profile(profile)
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    build_config.set_flag(precision)
+    num_inputs = network.num_inputs
+
+    for i in range(num_inputs):
+        input_tensor = network.get_input(i)
+        input_tensor.shape = Dims([32, 3, 128, 64])
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="float16",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default=None)
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/deepsort/ixrt/ci/prepare.sh b/models/cv/multi_object_tracking/deepsort/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fc1dfb3fa0aaf1f1169a9331e1ccef9b95d8009c
--- /dev/null
+++ b/models/cv/multi_object_tracking/deepsort/ixrt/ci/prepare.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+pip3 install -r requirements.txt
+
+# export onnx model
+python3 export.py --weight ckpt.t7 --output deepsort.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim deepsort.onnx deepsort_opt.onnx
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/deepsort/ixrt/export.py b/models/cv/multi_object_tracking/deepsort/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..11e5a5f0f644c7bd97486e32e24a2c46592033a5
--- /dev/null
+++ b/models/cv/multi_object_tracking/deepsort/ixrt/export.py
@@ -0,0 +1,149 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class BasicBlock(nn.Module):
+    def __init__(self, c_in, c_out,is_downsample=False):
+        super(BasicBlock,self).__init__()
+        self.is_downsample = is_downsample
+        if is_downsample:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
+        else:
+            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(c_out)
+        self.relu = nn.ReLU(True)
+        self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(c_out)
+        if is_downsample:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+        elif c_in != c_out:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
+                nn.BatchNorm2d(c_out)
+            )
+            self.is_downsample = True
+
+    def forward(self,x):
+        y = self.conv1(x)
+        y = self.bn1(y)
+        y = self.relu(y)
+        y = self.conv2(y)
+        y = self.bn2(y)
+        if self.is_downsample:
+            x = self.downsample(x)
+        return F.relu(x.add(y),True)
+
+def make_layers(c_in,c_out,repeat_times, is_downsample=False):
+    blocks = []
+    for i in range(repeat_times):
+        if i ==0:
+            blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
+        else:
+            blocks += [BasicBlock(c_out,c_out),]
+    return nn.Sequential(*blocks)
+
+class Net(nn.Module):
+    def __init__(self, num_classes=751 ,reid=False):
+        super(Net,self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(3, 64, 3, stride=1, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(3, 2, padding=1),
+        )
+        self.layer1 = make_layers(64, 64, 2, False)
+        self.layer2 = make_layers(64, 128, 2, True)
+        self.layer3 = make_layers(128, 256, 2, True)
+        self.layer4 = make_layers(256, 512, 2, True)
+        self.avgpool = nn.AvgPool2d((8, 4),1)
+        self.reid = reid
+        self.classifier = nn.Sequential(
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(inplace=True),
+            nn.Dropout(),
+            nn.Linear(256, num_classes),
+        )
+        
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        # B x 128
+        if self.reid:
+            x = x.div(x.norm(p=2, dim=1, keepdim=True))
+            return x
+        # classifier
+        x = self.classifier(x)
+        return x
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+
+    model = Net(reid=True)
+    checkpoint = torch.load(args.weight)
+    net_dict = checkpoint['net_dict']
+    model.load_state_dict(net_dict, strict=False)
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 128, 64)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/multi_object_tracking/deepsort/ixrt/inference.py b/models/cv/multi_object_tracking/deepsort/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..47d49c3e4aab8158c0a46a306fe92a69f31efbde
--- /dev/null
+++ b/models/cv/multi_object_tracking/deepsort/ixrt/inference.py
@@ -0,0 +1,223 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import time
+import argparse
+import tensorrt
+import torch
+import torchvision
+import numpy as np
+from tensorrt import Dims
+from cuda import cuda, cudart
+from tqdm import tqdm
+from utils import ReidEvaluator, SmallVehicleID
+
+from common import create_engine_context, get_io_bindings
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(path, batch_size, num_workers):
+    # data loader
+    query_dir = os.path.join(path, "query")
+    gallery_dir = os.path.join(path, "gallery")
+    transform = torchvision.transforms.Compose([
+        torchvision.transforms.Resize((128, 64)),
+        torchvision.transforms.ToTensor(),
+        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ])
+    queryloader = torch.utils.data.DataLoader(
+        torchvision.datasets.ImageFolder(query_dir, transform=transform),
+        batch_size, shuffle=False, num_workers=num_workers
+    )
+    galleryloader = torch.utils.data.DataLoader(
+        torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
+        batch_size, shuffle=True, num_workers=num_workers
+    )
+
+    return queryloader, galleryloader
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+    # Load Engine && I/O bindings
+    engine, context = create_engine_context(args.engine, logger)
+    inputs, outputs, allocations = get_io_bindings(engine)
+    
+    if args.warmup > 0:
+        print("\nWarm Start.")
+        for i in range(args.warmup):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+    
+    # just run perf test
+    if args.perf_only:
+        torch.cuda.synchronize()
+        start_time = time.time()
+
+        for i in range(10):
+            context.execute_v2(allocations)
+
+        torch.cuda.synchronize()
+        end_time = time.time()
+        forward_time = end_time - start_time
+        num_samples = 10 * args.batchsize
+        fps = num_samples / forward_time
+
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {args.fps_target}")
+    else:
+        # get dataloader
+        queryloader, galleryloader = get_dataloader(args.datasets, batch_size, 16)
+
+        query_features = torch.tensor([]).float()
+        query_labels = torch.tensor([]).long()
+        gallery_features = torch.tensor([]).float()
+        gallery_labels = torch.tensor([]).long()
+
+        # run queryloader
+        for input_data, labels in tqdm(queryloader):
+            # Pad the last batch
+            pad_batch = len(input_data) != batch_size
+            if pad_batch:
+                origin_size = len(input_data)
+                input_data = np.resize(input_data, (batch_size, *input_data.shape[1:]))
+            input_data = np.ascontiguousarray(input_data)
+
+            (err,) = cudart.cudaMemcpy(
+                inputs[0]["allocation"],
+                input_data,
+                input_data.nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+            )
+            assert err == cudart.cudaError_t.cudaSuccess
+
+            context.execute_v2(allocations)
+
+            output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+            (err,) = cudart.cudaMemcpy(
+                output,
+                outputs[0]["allocation"],
+                outputs[0]["nbytes"],
+                cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+            )
+            assert err == cudart.cudaError_t.cudaSuccess
+
+            features = torch.from_numpy(output)
+            if pad_batch:
+                features = features[:origin_size]
+
+            query_features = torch.cat((query_features, features), dim=0)
+            query_labels = torch.cat((query_labels, labels))
+        
+        # run galleryloader
+        for input_data, labels in tqdm(galleryloader):
+            # Pad the last batch
+            pad_batch = len(input_data) != batch_size
+            if pad_batch:
+                origin_size = len(input_data)
+                input_data = np.resize(input_data, (batch_size, *input_data.shape[1:]))
+            input_data = np.ascontiguousarray(input_data)
+
+            (err,) = cudart.cudaMemcpy(
+                inputs[0]["allocation"],
+                input_data,
+                input_data.nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+            )
+            assert err == cudart.cudaError_t.cudaSuccess
+
+            context.execute_v2(allocations)
+
+            output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+            (err,) = cudart.cudaMemcpy(
+                output,
+                outputs[0]["allocation"],
+                outputs[0]["nbytes"],
+                cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+            )
+            assert err == cudart.cudaError_t.cudaSuccess
+
+            features = torch.from_numpy(output)
+            if pad_batch:
+                features = features[:origin_size]
+
+            gallery_features = torch.cat((gallery_features, features), dim=0)
+            gallery_labels = torch.cat((gallery_labels, labels))
+
+        qf = query_features
+        ql = query_labels
+        gf = gallery_features
+        gl = gallery_labels
+        scores = qf.mm(gf.t())
+        res = scores.topk(1, dim=1)[1][:,0]
+        top1_correct = gl[res].eq(ql).sum().item()
+        top1_acc = round(top1_correct / ql.size(0) * 100.0, 2)
+        metricResult = {"metricResult": {"Acc": f"{top1_acc}%"}}
+        print(metricResult)
+        print(f"\n* Acc: {top1_acc} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/deepsort/ixrt/requirements.txt b/models/cv/multi_object_tracking/deepsort/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0516c70db6c22f6bd9cad91f0f3ca73f51bb0bea
--- /dev/null
+++ b/models/cv/multi_object_tracking/deepsort/ixrt/requirements.txt
@@ -0,0 +1,3 @@
+onnx
+tqdm
+onnxsim
diff --git a/models/cv/multi_object_tracking/deepsort/ixrt/scripts/infer_deepsort_fp16_accuracy.sh b/models/cv/multi_object_tracking/deepsort/ixrt/scripts/infer_deepsort_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fa96bf2fe0cd4a79889d53eea4afe5802085b5b5
--- /dev/null
+++ b/models/cv/multi_object_tracking/deepsort/ixrt/scripts/infer_deepsort_fp16_accuracy.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="deepsort_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model ${model_path}              \
+    --precision fp16                        \
+    --engine deepsort.engine
+
+
+# inference
+python3 inference.py                              \
+    --engine deepsort.engine                     \
+    --batchsize ${batchsize}                      \
+    --input_name input                            \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/deepsort/ixrt/scripts/infer_deepsort_fp16_performance.sh b/models/cv/multi_object_tracking/deepsort/ixrt/scripts/infer_deepsort_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7f5bfcffcbe5782d2f4f0c053fe4086292c63a5a
--- /dev/null
+++ b/models/cv/multi_object_tracking/deepsort/ixrt/scripts/infer_deepsort_fp16_performance.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="deepsort_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model ${model_path}              \
+    --precision fp16                        \
+    --engine deepsort.engine
+
+
+# inference
+python3 inference.py                              \
+    --engine deepsort.engine                     \
+    --batchsize ${batchsize}                      \
+    --input_name input                            \
+    --datasets ${datasets_path}                   \
+    --perf_only True
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/ixrt/README.md b/models/cv/multi_object_tracking/fastreid/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c48ed2751ef51dc282a1e4f1cd21c92a31f0ad1c
--- /dev/null
+++ b/models/cv/multi_object_tracking/fastreid/ixrt/README.md
@@ -0,0 +1,59 @@
+# FastReID (ixRT)
+
+## Model Description
+
+FastReID is a research platform that implements state-of-the-art re-identification algorithms.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0 | 25.12 |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <https://github.com/JDAI-CV/fast-reid/releases/download/v0.1.1/vehicleid_bot_R50-ibn.pth>
+
+Dataset: <https://www.pkuml.org/resources/pku-vehicleid.html> to download the vehicleid dataset.
+
+### Install Dependencies
+
+```bash
+pip3 install -r requirements.txt
+```
+
+### Model Conversion
+
+```bash
+# install fast-reid
+git clone https://github.com/JDAI-CV/fast-reid.git
+cd fast-reid
+pip3 install -r docs/requirements.txt
+
+# export onnx model
+python3 tools/deploy/onnx_export.py --config-file configs/VehicleID/bagtricks_R50-ibn.yml --name fast_reid --output ../ --opts MODEL.WEIGHTS ../vehicleid_bot_R50-ibn.pth
+cd ..
+```
+
+## Model Inference
+
+```bash
+export DATASETS_DIR=/Path/to/VehicleID
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_fastreid_fp16_accuracy.sh
+# Performance
+bash scripts/infer_fastreid_fp16_performance.sh
+```
+
+## Model Results
+
+| Model    | BatchSize | Precision | FPS     | Rank-1(%) | Rank-5(%) | mAP   |
+| :----: | :----: | :----: | :----: | :----: | :----: | :----: |
+| FastReid | 32        | FP16      | 2372.42 | 0.808     | 2.160     | 2.11 |
diff --git a/models/cv/multi_object_tracking/fastreid/ixrt/build_engine.py b/models/cv/multi_object_tracking/fastreid/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a1e53dce77fb7b5d801d966c61df4aa547546b8
--- /dev/null
+++ b/models/cv/multi_object_tracking/fastreid/ixrt/build_engine.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+from tensorrt import Dims
+
+def main(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    profile = builder.create_optimization_profile()
+    profile.set_shape("input", Dims([32, 3, 256, 256]), Dims([32, 3, 256, 256]), Dims([32, 3, 256, 256]))
+    build_config.add_optimization_profile(profile)
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    build_config.set_flag(precision)
+    num_inputs = network.num_inputs
+
+    for i in range(num_inputs):
+        input_tensor = network.get_input(i)
+        input_tensor.shape = Dims([32, 3, 256, 256])
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="float16",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default=None)
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/ixrt/ci/prepare.sh b/models/cv/multi_object_tracking/fastreid/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..64a287b54fc981136e7377e8bc0a010b03bd2a6f
--- /dev/null
+++ b/models/cv/multi_object_tracking/fastreid/ixrt/ci/prepare.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+pip3 install -r requirements.txt
+
+# install fast-reid
+git clone https://github.com/JDAI-CV/fast-reid.git --depth=1
+cd fast-reid
+pip3 install -r docs/requirements.txt
+
+# export onnx model
+python3 tools/deploy/onnx_export.py --config-file configs/VehicleID/bagtricks_R50-ibn.yml --name fast_reid --output ../ --opts MODEL.WEIGHTS ../vehicleid_bot_R50-ibn.pth
+
+cd ..
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/ixrt/common.py b/models/cv/multi_object_tracking/fastreid/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef92a6ba6291058d20f575edb09da35ebff3a937
--- /dev/null
+++ b/models/cv/multi_object_tracking/fastreid/ixrt/common.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/ixrt/inference.py b/models/cv/multi_object_tracking/fastreid/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..417f7c14350f547ecebe146cd05d89498bbab4b0
--- /dev/null
+++ b/models/cv/multi_object_tracking/fastreid/ixrt/inference.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import time
+import argparse
+import tensorrt
+import torch
+import torchvision
+import numpy as np
+from tensorrt import Dims
+from cuda import cuda, cudart
+from tqdm import tqdm
+from utils import ReidEvaluator, SmallVehicleID
+
+from common import create_engine_context, get_io_bindings
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+    # Load Engine && I/O bindings
+    engine, context = create_engine_context(args.engine, logger)
+    inputs, outputs, allocations = get_io_bindings(engine)
+    
+    if args.warmup > 0:
+        print("\nWarm Start.")
+        for i in range(args.warmup):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+    
+    # just run perf test
+    if args.perf_only:
+        torch.cuda.synchronize()
+        start_time = time.time()
+
+        for i in range(10):
+            context.execute_v2(allocations)
+
+        torch.cuda.synchronize()
+        end_time = time.time()
+        forward_time = end_time - start_time
+        num_samples = 10 * args.batchsize
+        fps = num_samples / forward_time
+
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {args.fps_target}")
+    else:
+        dataset = SmallVehicleID(args.datasets)
+        # get dataloader
+        dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=16, drop_last=False)
+
+        reid_evaluator = ReidEvaluator(len(dataset.query))
+
+        for data in tqdm(dataloader):
+            input_data = data['images']
+            pad_batch = len(input_data) != batch_size
+            if pad_batch:
+                origin_size = len(input_data)
+                input_data = np.resize(input_data, (batch_size, *input_data.shape[1:]))
+            input_data = np.ascontiguousarray(input_data)
+
+            (err,) = cudart.cudaMemcpy(
+                inputs[0]["allocation"],
+                input_data,
+                input_data.nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+            )
+            assert err == cudart.cudaError_t.cudaSuccess
+
+            context.execute_v2(allocations)
+
+            output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+            (err,) = cudart.cudaMemcpy(
+                output,
+                outputs[0]["allocation"],
+                outputs[0]["nbytes"],
+                cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+            )
+            assert err == cudart.cudaError_t.cudaSuccess
+
+            if pad_batch:
+                output = output[:origin_size]
+
+            reid_evaluator.process(data, output)
+
+        results = reid_evaluator.evaluate()
+        metricResult = {"metricResult": {}}
+        for key in results.keys():
+            print(f"\n* {key}: {results[key]}")
+            metricResult["metricResult"][key] = results[key]
+        print(metricResult)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/ixrt/requirements.txt b/models/cv/multi_object_tracking/fastreid/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b67b3134e04c820d471ff6f17447cdb01fdd2b52
--- /dev/null
+++ b/models/cv/multi_object_tracking/fastreid/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+onnx
+tqdm
+onnxsim
+onnxoptimizer
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/ixrt/scripts/infer_fastreid_fp16_accuracy.sh b/models/cv/multi_object_tracking/fastreid/ixrt/scripts/infer_fastreid_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a2a4df37b9268c56de36f1ca733bbe51ae54cdd7
--- /dev/null
+++ b/models/cv/multi_object_tracking/fastreid/ixrt/scripts/infer_fastreid_fp16_accuracy.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="fast_reid.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                                 \
+    --model ${model_path}                          \
+    --precision float16                                    \
+    --engine fast_reid.engine
+
+
+# inference
+python3 inference.py                              \
+    --engine fast_reid.engine                      \
+    --batchsize ${batchsize}                      \
+    --input_name batched_inputs.1                 \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/ixrt/scripts/infer_fastreid_fp16_performance.sh b/models/cv/multi_object_tracking/fastreid/ixrt/scripts/infer_fastreid_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..59b55fbf793c99ebfda315778922555d158a9c79
--- /dev/null
+++ b/models/cv/multi_object_tracking/fastreid/ixrt/scripts/infer_fastreid_fp16_performance.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="fast_reid.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                                 \
+    --model ${model_path}                          \
+    --precision float16                                    \
+    --engine fast_reid.engine
+
+
+# inference
+python3 inference.py                              \
+    --engine fast_reid.engine                      \
+    --batchsize ${batchsize}                      \
+    --input_name batched_inputs.1                 \
+    --datasets ${datasets_path}                   \
+    --perf_only True
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/ixrt/utils.py b/models/cv/multi_object_tracking/fastreid/ixrt/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c2bcb2e4ca97572879f979ac79c1168bb2eaaf9
--- /dev/null
+++ b/models/cv/multi_object_tracking/fastreid/ixrt/utils.py
@@ -0,0 +1,302 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import torch
+import copy
+import numpy as np
+from PIL import Image
+from tabulate import tabulate
+from termcolor import colored
+from collections import OrderedDict
+import torch.nn.functional as F
+import torchvision.transforms as T
+
+class ReidEvaluator:
+    def __init__(self, num_query, output_dir=None):
+        self._predictions = []
+        self._num_query = num_query
+
+    def process(self, inputs, outputs):
+        prediction = {
+            'feats': torch.from_numpy(outputs),
+            'pids': inputs['targets'],
+            'camids': inputs['camids']
+        }
+
+        self._predictions.append(prediction)
+    
+    def compute_cosine_distance(self, features, others):
+        """Computes cosine distance.
+        Args:
+            features (torch.Tensor): 2-D feature matrix.
+            others (torch.Tensor): 2-D feature matrix.
+        Returns:
+            torch.Tensor: distance matrix.
+        """
+        features = F.normalize(features, p=2, dim=1)
+        others = F.normalize(others, p=2, dim=1)
+        dist_m = 1 - torch.mm(features, others.t())
+        return dist_m.cpu().numpy()
+    
+    def evaluate_rank(self, distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50):
+        """Evaluation with market1501 metric
+        Key: for each query identity, its gallery images from the same camera view are discarded.
+        """
+        num_q, num_g = distmat.shape
+
+        if num_g < max_rank:
+            max_rank = num_g
+            print('Note: number of gallery samples is quite small, got {}'.format(num_g))
+
+        indices = np.argsort(distmat, axis=1)
+        # compute cmc curve for each query
+        all_cmc = []
+        all_AP = []
+        all_INP = []
+        num_valid_q = 0.  # number of valid query
+
+        for q_idx in range(num_q):
+            # get query pid and camid
+            q_pid = q_pids[q_idx]
+            q_camid = q_camids[q_idx]
+
+            # remove gallery samples that have the same pid and camid with query
+            order = indices[q_idx]
+            remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
+            keep = np.invert(remove)
+
+            # compute cmc curve
+            matches = (g_pids[order] == q_pid).astype(np.int32)
+            raw_cmc = matches[keep]  # binary vector, positions with value 1 are correct matches
+            if not np.any(raw_cmc):
+                # this condition is true when query identity does not appear in gallery
+                continue
+
+            cmc = raw_cmc.cumsum()
+
+            pos_idx = np.where(raw_cmc == 1)
+            max_pos_idx = np.max(pos_idx)
+            inp = cmc[max_pos_idx] / (max_pos_idx + 1.0)
+            all_INP.append(inp)
+
+            cmc[cmc > 1] = 1
+
+            all_cmc.append(cmc[:max_rank])
+            num_valid_q += 1.
+
+            # compute average precision
+            # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
+            num_rel = raw_cmc.sum()
+            tmp_cmc = raw_cmc.cumsum()
+            tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)]
+            tmp_cmc = np.asarray(tmp_cmc) * raw_cmc
+            AP = tmp_cmc.sum() / num_rel
+            all_AP.append(AP)
+
+        assert num_valid_q > 0, 'Error: all query identities do not appear in gallery'
+
+        all_cmc = np.asarray(all_cmc).astype(np.float32)
+        all_cmc = all_cmc.sum(0) / num_valid_q
+
+        return all_cmc, all_AP, all_INP
+        
+    def evaluate(self):
+        predictions = self._predictions
+
+        features = []
+        pids = []
+        camids = []
+
+        for prediction in predictions:
+            features.append(prediction['feats'])
+            pids.append(prediction['pids'])
+            camids.append(prediction['camids'])
+        
+        features = torch.cat(features, dim=0)
+        pids = torch.cat(pids, dim=0).numpy()
+        camids = torch.cat(camids, dim=0).numpy()
+
+        query_features = features[:self._num_query]
+        query_pids = pids[:self._num_query]
+        query_camids = camids[:self._num_query]
+
+        gallery_features = features[self._num_query:]
+        gallery_pids = pids[self._num_query:]
+        gallery_camids = camids[self._num_query:]
+
+        self._results = OrderedDict()
+
+        dist = self.compute_cosine_distance(query_features, gallery_features)
+        
+        cmc, all_AP, all_INP = self.evaluate_rank(dist, query_pids, gallery_pids, query_camids, gallery_camids)
+
+        mAP = np.mean(all_AP)
+        mINP = np.mean(all_INP)
+        for r in [1, 5, 10]:
+            self._results['Rank-{}'.format(r)] = cmc[r - 1] * 100
+        self._results['mAP'] = mAP * 100
+        self._results['mINP'] = mINP * 100
+        self._results["metric"] = (mAP + cmc[0]) / 2 * 100
+
+        return copy.deepcopy(self._results)
+    
+class VehicleID(torch.utils.data.Dataset):
+    def __init__(self, root='datasets', test_list="", image_size=(256, 256)):
+        self.image_dir = os.path.join(root, "image")
+
+        if test_list:
+            self.test_list = test_list
+        else:
+            self.test_list = os.path.join(root, 'train_test_split/test_list_13164.txt')
+        
+        required_files = [
+            root,
+            self.image_dir,
+            self.test_list
+        ]
+        
+        self.check_before_run(required_files)
+        self.query, self.gallery = self.process_dir(self.test_list)
+
+        self.transforms = T.Compose([
+            T.Resize(image_size, interpolation=3),
+            T.ToTensor(),
+        ])
+
+        self.img_items = self.query + self.gallery
+
+        pid_set = set()
+        cam_set = set()
+
+        for i in self.img_items:
+            pid_set.add(i[1])
+            cam_set.add(i[2])
+        
+        self.pids = sorted(list(pid_set))
+        self.cams = sorted(list(cam_set))
+
+    def __getitem__(self, index):
+        img_item = self.img_items[index]
+        img_path = img_item[0]
+        pid = img_item[1]
+        camid = img_item[2]
+        img = Image.open(img_path)
+        img = self.transforms(img) * 255.0
+
+        return {
+            "images": img,
+            "targets": pid,
+            "camids": camid,
+            "img_paths": img_path
+        }
+    
+    def __len__(self):
+        return len(self.img_items)
+
+    def check_before_run(self, required_files):
+        if isinstance(required_files, str):
+            required_files = [required_files]
+
+        for fpath in required_files:
+            if not os.path.exists(fpath):
+                raise RuntimeError('"{}" is not found'.format(fpath))
+    
+    def process_dir(self, list_file):
+        img_list_lines = open(list_file, 'r').readlines()
+
+        dataset = []
+        for line in img_list_lines:
+            line = line.strip()
+            vid = int(line.split(' ')[1])
+            img_id = line.split(' ')[0]
+            img_path = os.path.join(self.image_dir, f"{img_id}.jpg")
+            img_id = int(img_id)
+            dataset.append((img_path, vid, img_id))
+        
+        # random.shuffle(dataset)
+        vid_container = set()
+        query = []
+        gallery = []
+        for sample in dataset:
+            if sample[1] not in vid_container:
+                vid_container.add(sample[1])
+                gallery.append(sample)
+            else:
+                query.append(sample)
+        
+        return query, gallery
+
+    def parse_data(self, data):
+        """Parses data list and returns the number of person IDs
+        and the number of camera views.
+        Args:
+            data (list): contains tuples of (img_path(s), pid, camid)
+        """
+        pids = set()
+        cams = set()
+        for info in data:
+            pids.add(info[1])
+            cams.add(info[2])
+        return len(pids), len(cams)
+
+    def show_test(self):
+        num_query_pids, num_query_cams = self.parse_data(self.query)
+        num_gallery_pids, num_gallery_cams = self.parse_data(self.gallery)
+
+        headers = ['subset', '# ids', '# images', '# cameras']
+        csv_results = [
+            ['query', num_query_pids, len(self.query), num_query_cams],
+            ['gallery', num_gallery_pids, len(self.gallery), num_gallery_cams],
+        ]
+
+        table = tabulate(
+            csv_results,
+            tablefmt="pipe",
+            headers=headers,
+            numalign="left",
+        )
+        print(f"=> Loaded {self.__class__.__name__} in csv format: \n" + colored(table, "cyan"))
+    
+class SmallVehicleID(VehicleID):
+    """VehicleID.
+    Small test dataset statistics:
+        - identities: 800.
+        - images: 6493.
+    """
+    def __init__(self, root='datasets'):
+        self.test_list = os.path.join(root, 'train_test_split/test_list_800.txt')
+        super(SmallVehicleID, self).__init__(root, self.test_list)
+
+class MediumVehicleID(VehicleID):
+    """VehicleID.
+    Medium test dataset statistics:
+        - identities: 1600.
+        - images: 13377.
+    """
+    def __init__(self, root='datasets'):
+        self.test_list = os.path.join(root, 'train_test_split/test_list_1600.txt')
+        super(MediumVehicleID, self).__init__(root, self.test_list)
+
+class LargeVehicleID(VehicleID):
+    """VehicleID.
+    Large test dataset statistics:
+        - identities: 2400.
+        - images: 19777.
+    """
+    
+    def __init__(self, root='datasets'):
+        self.test_list = os.path.join(root, 'train_test_split/test_list_2400.txt')
+        super(LargeVehicleID, self).__init__(root, self.test_list)
\ No newline at end of file
diff --git a/models/cv/object_detection/ixrt_common/inference_mmdet.py b/models/cv/object_detection/ixrt_common/inference_mmdet.py
index 69a7e6a5af468101d95288603e13819d376d81a7..9f21042cf778f14ecee71393e821e70c4638d6a0 100644
--- a/models/cv/object_detection/ixrt_common/inference_mmdet.py
+++ b/models/cv/object_detection/ixrt_common/inference_mmdet.py
@@ -168,7 +168,7 @@ def main():
                 output = torch.from_numpy(output)
                 
                 # Handle RetinaNet's output structure differently
-                if filename.lower().startswith("retinanet_"):
+                if filename.lower().startswith("retinanet_") or filename.lower().startswith("yolof_"):
                     if i < len(outputs) / 2:
                         cls_score.append(output)
                     else:
@@ -185,7 +185,7 @@ def main():
                 data_samples.metainfo for data_samples in input_data['data_samples']
             ]
 
-            if filename.lower().startswith(("fovea_r50_", "fsaf_", "retinanet_")):
+            if filename.lower().startswith(("fovea_r50_", "fsaf_", "retinanet_", "yolof_")):
                 results_list = runner.model.bbox_head.predict_by_feat(cls_score, box_reg, batch_img_metas=batch_img_metas, rescale=True)
             else:
                 results_list = runner.model.bbox_head.predict_by_feat(cls_score, box_reg, score_factors, batch_img_metas=batch_img_metas, rescale=True)
diff --git a/models/cv/object_detection/ixrt_common/yolof_r50-c5_8xb8-1x_coco.py b/models/cv/object_detection/ixrt_common/yolof_r50-c5_8xb8-1x_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..3549bbbe1f4610c0419803c82f8972eaafdab8ed
--- /dev/null
+++ b/models/cv/object_detection/ixrt_common/yolof_r50-c5_8xb8-1x_coco.py
@@ -0,0 +1,291 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+auto_scale_lr = dict(base_batch_size=64, enable=False)
+backend_args = None
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+    checkpoint=dict(interval=1, type='CheckpointHook'),
+    logger=dict(interval=50, type='LoggerHook'),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    timer=dict(type='IterTimerHook'),
+    visualization=dict(type='DetVisualizationHook'))
+default_scope = 'mmdet'
+env_cfg = dict(
+    cudnn_benchmark=False,
+    dist_cfg=dict(backend='nccl'),
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+load_from = None
+log_level = 'ERROR'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+model = dict(
+    backbone=dict(
+        depth=50,
+        frozen_stages=1,
+        init_cfg=dict(
+            checkpoint='open-mmlab://detectron/resnet50_caffe',
+            type='Pretrained'),
+        norm_cfg=dict(requires_grad=False, type='BN'),
+        norm_eval=True,
+        num_stages=4,
+        out_indices=(3, ),
+        style='caffe',
+        type='ResNet'),
+    bbox_head=dict(
+        anchor_generator=dict(
+            ratios=[
+                1.0,
+            ],
+            scales=[
+                1,
+                2,
+                4,
+                8,
+                16,
+            ],
+            strides=[
+                32,
+            ],
+            type='AnchorGenerator'),
+        bbox_coder=dict(
+            add_ctr_clamp=True,
+            ctr_clamp=32,
+            target_means=[
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+            ],
+            target_stds=[
+                1.0,
+                1.0,
+                1.0,
+                1.0,
+            ],
+            type='DeltaXYWHBBoxCoder'),
+        in_channels=512,
+        loss_bbox=dict(loss_weight=1.0, type='GIoULoss'),
+        loss_cls=dict(
+            alpha=0.25,
+            gamma=2.0,
+            loss_weight=1.0,
+            type='FocalLoss',
+            use_sigmoid=True),
+        num_classes=80,
+        reg_decoded_bbox=True,
+        type='YOLOFHead'),
+    data_preprocessor=dict(
+        bgr_to_rgb=False,
+        mean=[
+            103.53,
+            116.28,
+            123.675,
+        ],
+        pad_size_divisor=32,
+        std=[
+            1.0,
+            1.0,
+            1.0,
+        ],
+        type='DetDataPreprocessor'),
+    neck=dict(
+        block_dilations=[
+            2,
+            4,
+            6,
+            8,
+        ],
+        block_mid_channels=128,
+        in_channels=2048,
+        num_residual_blocks=4,
+        out_channels=512,
+        type='DilatedEncoder'),
+    test_cfg=dict(
+        max_per_img=100,
+        min_bbox_size=0,
+        nms=dict(iou_threshold=0.6, type='nms'),
+        nms_pre=1000,
+        score_thr=0.05),
+    train_cfg=dict(
+        allowed_border=-1,
+        assigner=dict(
+            neg_ignore_thr=0.7, pos_ignore_thr=0.15, type='UniformAssigner'),
+        debug=False,
+        pos_weight=-1),
+    type='YOLOF')
+optim_wrapper = dict(
+    optimizer=dict(lr=0.12, momentum=0.9, type='SGD', weight_decay=0.0001),
+    paramwise_cfg=dict(
+        custom_keys=dict(backbone=dict(lr_mult=0.3333333333333333)),
+        norm_decay_mult=0.0),
+    type='OptimWrapper')
+param_scheduler = [
+    dict(
+        begin=0,
+        by_epoch=False,
+        end=1500,
+        start_factor=0.00066667,
+        type='LinearLR'),
+    dict(
+        begin=0,
+        by_epoch=True,
+        end=12,
+        gamma=0.1,
+        milestones=[
+            8,
+            11,
+        ],
+        type='MultiStepLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+    batch_size=32,
+    dataset=dict(
+        ann_file='annotations/instances_val2017.json',
+        backend_args=None,
+        data_prefix=dict(img='images/val2017/'),
+        data_root='data/datasets/coco/',
+        pipeline=[
+            dict(backend_args=None, type='LoadImageFromFile'),
+            dict(keep_ratio=False, scale=(
+                800,
+                800,
+            ), type='Resize'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(
+                meta_keys=(
+                    'img_id',
+                    'img_path',
+                    'ori_shape',
+                    'img_shape',
+                    'scale_factor',
+                ),
+                type='PackDetInputs'),
+        ],
+        test_mode=True,
+        type='CocoDataset'),
+    drop_last=False,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+    ann_file='data/datasets/coco/annotations/instances_val2017.json',
+    backend_args=None,
+    format_only=False,
+    metric='bbox',
+    type='CocoMetric')
+test_pipeline = [
+    dict(backend_args=None, type='LoadImageFromFile'),
+    dict(keep_ratio=False, scale=(
+        800,
+        800,
+    ), type='Resize'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        meta_keys=(
+            'img_id',
+            'img_path',
+            'ori_shape',
+            'img_shape',
+            'scale_factor',
+        ),
+        type='PackDetInputs'),
+]
+train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    batch_size=8,
+    dataset=dict(
+        ann_file='annotations/instances_train2017.json',
+        backend_args=None,
+        data_prefix=dict(img='train2017/'),
+        data_root='data/coco/',
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=[
+            dict(backend_args=None, type='LoadImageFromFile'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(keep_ratio=False, scale=(
+                800,
+                800,
+            ), type='Resize'),
+            dict(prob=0.5, type='RandomFlip'),
+            dict(max_shift_px=32, prob=0.5, type='RandomShift'),
+            dict(type='PackDetInputs'),
+        ],
+        type='CocoDataset'),
+    num_workers=8,
+    persistent_workers=True,
+    sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+    dict(backend_args=None, type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(keep_ratio=False, scale=(
+        800,
+        800,
+    ), type='Resize'),
+    dict(prob=0.5, type='RandomFlip'),
+    dict(max_shift_px=32, prob=0.5, type='RandomShift'),
+    dict(type='PackDetInputs'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+    batch_size=1,
+    dataset=dict(
+        ann_file='annotations/instances_val2017.json',
+        backend_args=None,
+        data_prefix=dict(img='val2017/'),
+        data_root='data/coco/',
+        pipeline=[
+            dict(backend_args=None, type='LoadImageFromFile'),
+            dict(keep_ratio=False, scale=(
+                800,
+                800,
+            ), type='Resize'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(
+                meta_keys=(
+                    'img_id',
+                    'img_path',
+                    'ori_shape',
+                    'img_shape',
+                    'scale_factor',
+                ),
+                type='PackDetInputs'),
+        ],
+        test_mode=True,
+        type='CocoDataset'),
+    drop_last=False,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+    ann_file='data/coco/annotations/instances_val2017.json',
+    backend_args=None,
+    format_only=False,
+    metric='bbox',
+    type='CocoMetric')
+vis_backends = [
+    dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+    name='visualizer',
+    type='DetLocalVisualizer',
+    vis_backends=[
+        dict(type='LocalVisBackend'),
+    ])
+work_dir = './workspace'
diff --git a/models/cv/object_detection/yolof/ixrt/README.md b/models/cv/object_detection/yolof/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..334e5386b301c6dcd49493b8f6910d2b03bdd3dd
--- /dev/null
+++ b/models/cv/object_detection/yolof/ixrt/README.md
@@ -0,0 +1,91 @@
+# YOLOF (ixRT)
+
+## Model Description
+
+YOLOF is a lightweight object detection model that focuses on single-level feature maps for detection and enhances feature representation using dilated convolution modules. With a simple and efficient structure, it is well-suited for real-time object detection tasks.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0     |  25.12  |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <https://download.openmmlab.com/mmdetection/v2.0/yolof/yolof_r50_c5_8x8_1x_coco/yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth>
+
+Dataset:
+  - <https://github.com/ultralytics/assets/releases/download/v0.0.0/coco2017labels.zip> to download the labels dataset.
+  - <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
+  - <http://images.cocodataset.org/zips/train2017.zip> to download the train dataset.
+
+```bash
+unzip -q -d ./ coco2017labels.zip
+unzip -q -d ./coco/images/ train2017.zip
+unzip -q -d ./coco/images/ val2017.zip
+
+coco
+├── annotations
+│   └── instances_val2017.json
+├── images
+│   ├── train2017
+│   └── val2017
+├── labels
+│   ├── train2017
+│   └── val2017
+├── LICENSE
+├── README.txt
+├── test-dev2017.txt
+├── train2017.cache
+├── train2017.txt
+├── val2017.cache
+└── val2017.txt
+```
+
+### Install Dependencies
+
+Contact the Iluvatar administrator to get the missing packages:
+- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl
+
+```bash
+pip3 install -r requirements.txt
+```
+
+### Model Conversion
+
+```bash
+mkdir -p checkpoints/
+
+# export onnx model
+python3 export.py --weight yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth --cfg ../../ixrt_common/yolof_r50-c5_8xb8-1x_coco.py --output checkpoints/yolof.onnx
+```
+
+## Model Inference
+
+```bash
+export PROJ_DIR=./
+export DATASETS_DIR=./coco/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=../../ixrt_common
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_yolof_fp16_accuracy.sh
+# Performance
+bash scripts/infer_yolof_fp16_performance.sh
+```
+
+## Model Results
+
+| Model | BatchSize | Precision | FPS    | IOU@0.5 | IOU@0.5:0.95 |
+| :----:| :-------: | :-------: | :----: | :-----: | :----------: |
+| YOLOF | 32        | FP16      | 331.10 | 0.527   | 0.343        |
+
+## References
+
+- [mmdetection](https://github.com/open-mmlab/mmdetection.git)
\ No newline at end of file
diff --git a/models/cv/object_detection/yolof/ixrt/ci/prepare.sh b/models/cv/object_detection/yolof/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..25523a8b3912c9094d60b78c4d121ff65c09daa5
--- /dev/null
+++ b/models/cv/object_detection/yolof/ixrt/ci/prepare.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+pip3 install -r requirements.txt
+mkdir -p checkpoints/
+python3 export.py --weight yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth --cfg ../../ixrt_common/yolof_r50-c5_8xb8-1x_coco.py --output checkpoints/yolof.onnx
diff --git a/models/cv/object_detection/yolof/ixrt/deploy_default.py b/models/cv/object_detection/yolof/ixrt/deploy_default.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6c4d46abafaf80eac32f3fd8a2b68e245d8fe01
--- /dev/null
+++ b/models/cv/object_detection/yolof/ixrt/deploy_default.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+onnx_config = dict(
+    type='onnx',
+    export_params=True,
+    keep_initializers_as_inputs=False,
+    opset_version=11,
+    save_file='end2end.onnx',
+    input_names=['input'],
+    output_names=['output'],
+    input_shape=None,
+    optimize=True)
+
+codebase_config = dict(
+    type='mmdet',
+    task='ObjectDetection',
+    model_type='end2end',
+    post_processing=dict(
+        score_threshold=0.05,
+        confidence_threshold=0.005,
+        iou_threshold=0.5,
+        max_output_boxes_per_class=200,
+        pre_top_k=5000,
+        keep_top_k=100,
+        background_label_id=-1,
+    ))
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/object_detection/yolof/ixrt/export.py b/models/cv/object_detection/yolof/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..6329f18ca0daf37a14375eddc8e4d7189863ecfb
--- /dev/null
+++ b/models/cv/object_detection/yolof/ixrt/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+
+    parser.add_argument("--cfg", 
+                    type=str, 
+                    required=True, 
+                    help="model config file.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+
+    deploy_cfg = 'deploy_default.py'
+    model_cfg = args.cfg
+    model_checkpoint = args.weight
+
+    deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+    task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+    model = task_processor.build_pytorch_model(model_checkpoint)
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 800, 800)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )
+
+    print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+    main()
+
diff --git a/models/cv/object_detection/yolof/ixrt/requirements.txt b/models/cv/object_detection/yolof/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..520dadd42e95326432eb18d23d2da1e99fb7816e
--- /dev/null
+++ b/models/cv/object_detection/yolof/ixrt/requirements.txt
@@ -0,0 +1,6 @@
+onnx
+tqdm
+onnxsim
+mmdet
+mmdeploy==1.3.1
+mmengine
diff --git a/models/cv/object_detection/yolof/ixrt/scripts/infer_yolof_fp16_accuracy.sh b/models/cv/object_detection/yolof/ixrt/scripts/infer_yolof_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b3a6d9e5a7acf547f29ddb3b98da4c85bbc4ec55
--- /dev/null
+++ b/models/cv/object_detection/yolof/ixrt/scripts/infer_yolof_fp16_accuracy.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+WARM_UP=-1
+TGT=-1
+LOOP_COUNT=-1
+RUN_MODE=MAP
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+MODEL_NAME="yolof"
+ORIGINE_MODEL="${CHECKPOINTS_DIR}/${MODEL_NAME}.onnx"
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+
+step=0
+CURRENT_MODEL=${ORIGINE_MODEL}
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model Skipped, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+            --origin_model ${CURRENT_MODEL}   \
+            --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+CURRENT_MODEL=${SIM_MODEL}
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py  \
+        --batch_size ${BSZ}                 \
+        --origin_model ${CURRENT_MODEL}     \
+        --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --model ${CURRENT_MODEL}                \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference_mmdet.py \
+        --engine ${ENGINE_FILE} \
+        --cfg_file ${RUN_DIR}/yolof_r50-c5_8xb8-1x_coco.py \
+        --datasets ${DATASETS_DIR} \
+        --batchsize ${BSZ} \
+        --acc_target ${TGT}; check_status
+exit ${EXIT_STATUS}
diff --git a/models/cv/object_detection/yolof/ixrt/scripts/infer_yolof_fp16_performance.sh b/models/cv/object_detection/yolof/ixrt/scripts/infer_yolof_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..bdda913b6d107dc0e8ab73878d6f2d2813ff1e50
--- /dev/null
+++ b/models/cv/object_detection/yolof/ixrt/scripts/infer_yolof_fp16_performance.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+WARM_UP=-1
+TGT=-1
+LOOP_COUNT=-1
+RUN_MODE=MAP
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+MODEL_NAME="yolof"
+ORIGINE_MODEL="${CHECKPOINTS_DIR}/${MODEL_NAME}.onnx"
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+
+step=0
+CURRENT_MODEL=${ORIGINE_MODEL}
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model Skipped, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+            --origin_model ${CURRENT_MODEL}   \
+            --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+CURRENT_MODEL=${SIM_MODEL}
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py  \
+        --batch_size ${BSZ}                 \
+        --origin_model ${CURRENT_MODEL}     \
+        --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --model ${CURRENT_MODEL}                \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference_mmdet.py \
+        --engine ${ENGINE_FILE} \
+        --cfg_file ${RUN_DIR}/yolof_r50-c5_8xb8-1x_coco.py \
+        --perf_only True \
+        --datasets ${DATASETS_DIR} \
+        --batchsize ${BSZ} \
+        --fps_target ${TGT}; check_status
+exit ${EXIT_STATUS}
diff --git a/models/cv/object_detection/yolov12/ixrt/README.md b/models/cv/object_detection/yolov12/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6d44c0a91bb775a98aad4f61be816a7ab6876c7d
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/README.md
@@ -0,0 +1,92 @@
+# YOLOv12 (ixRT)
+
+## Model Description
+
+YOLOv12 achieves high precision and efficient real-time object detection by integrating attention mechanisms and innovative architectural design. YOLOv12-N is the lightweight version of this series, optimized for resource-constrained environments, maintaining the core advantages of YOLOv12 while offering fast inference and excellent detection accuracy.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0 | 25.12 |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <https://github.com/sunsmarterjie/yolov12/releases/download/turbo/yolov12n.pt>
+
+Dataset:
+  - <https://github.com/ultralytics/assets/releases/download/v0.0.0/coco2017labels.zip> to download the labels dataset.
+  - <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
+  - <http://images.cocodataset.org/zips/train2017.zip> to download the train dataset.
+
+```bash
+unzip -q -d ./ coco2017labels.zip
+unzip -q -d ./coco/images/ train2017.zip
+unzip -q -d ./coco/images/ val2017.zip
+
+coco
+├── annotations
+│   └── instances_val2017.json
+├── images
+│   ├── train2017
+│   └── val2017
+├── labels
+│   ├── train2017
+│   └── val2017
+├── LICENSE
+├── README.txt
+├── test-dev2017.txt
+├── train2017.cache
+├── train2017.txt
+├── val2017.cache
+└── val2017.txt
+```
+
+### Install Dependencies
+
+```bash
+pip3 install -r requirements.txt
+```
+
+## Model Conversion
+
+```bash
+git clone --depth 1 https://github.com/sunsmarterjie/yolov12.git
+cd yolov12
+pip3 install -e .
+cd ..
+
+mkdir checkpoints
+mv yolov12n.pt yolov12.pt
+python3 export.py --weight yolov12.pt --batch 32
+```
+
+## Model Inference
+
+```bash
+export PROJ_DIR=./
+export DATASETS_DIR=/path/to/coco/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=./
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_yolov12_fp16_accuracy.sh
+# Performance
+bash scripts/infer_yolov12_fp16_performance.sh
+```
+
+## Model Results
+
+| Model   | BatchSize | Precision | FPS     | IOU@0.5 | IOU@0.5:0.95 |
+| ------- | --------- | --------- | ------- | ------- | ------------ |
+| YOLOv12 | 32        | FP16      | 552.656 | 0.559   | 0.403        |
+
+## References
+
+- [YOLOv12](https://github.com/sunsmarterjie/yolov12)
diff --git a/models/cv/object_detection/yolov12/ixrt/build_engine.py b/models/cv/object_detection/yolov12/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0839fc3229ef4ab6d23c7ffeae36a1962bcaaf1
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/build_engine.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+from tensorrt import Dims
+
+
+def build_engine_trtapi_staticshape(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    # print("precision : ", precision)
+    build_config.set_flag(precision)
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+    print("Build static shape engine done!")
+
+
+def build_engine_trtapi_dynamicshape(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("input",
+                        Dims([1, 3, 608, 608]),
+                        Dims([32, 3, 608, 608]),
+                        Dims([64, 3, 608, 608]),
+    )
+    build_config.add_optimization_profile(profile)
+
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    # print("precision : ", precision)
+    build_config.set_flag(precision)
+
+    # set dynamic
+    num_inputs = network.num_inputs
+    for i in range(num_inputs):
+        input_tensor = network.get_input(i)
+        input_tensor.shape = Dims([-1, 3, 608, 608])
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+    print("Build dynamic shape engine done!")
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+            help="The precision of datatype")
+    # engine args
+    parser.add_argument("--engine", type=str, default=None)
+
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    build_engine_trtapi_staticshape(args)
+    # build_engine_trtapi_dynamicshape(args)
diff --git a/models/cv/object_detection/yolov12/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov12/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f6b52e72e18abc892e36b097762fc946aa5f1ae2
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/ci/prepare.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+pip3 install -r requirements.txt
+
+mkdir checkpoints
+mv yolov12n yolov12.pt
+python3 export.py --weight yolov12.pt --batch 32
+mv yolov12.onnx checkpoints/
diff --git a/models/cv/object_detection/yolov12/ixrt/common.py b/models/cv/object_detection/yolov12/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..74022b562af71ef8a234b5075252ea6a32505558
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/common.py
@@ -0,0 +1,337 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import glob
+import time
+import numpy as np
+from tqdm import tqdm
+
+import tensorrt
+from cuda import cuda, cudart
+
+
+def load_class_names(namesfile):
+    class_names = []
+    with open(namesfile, 'r') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        line = line.rstrip()
+        class_names.append(line)
+    return class_names
+
+# input  : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)]
+# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)]
+def box_class85to6(input):
+    center_x_y = input[:, :2]
+    side = input[:, 2:4]
+    conf = input[:, 4:5]
+    class_id = np.argmax(input[:, 5:], axis = -1)
+    class_id = class_id.astype(np.float32).reshape(-1, 1) + 1
+    max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1)
+    x1_y1 = center_x_y - 0.5 * side
+    x2_y2 = center_x_y + 0.5 * side
+    nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1)
+    return nms_input
+
+def save2json(batch_img_id, pred_boxes, json_result, class_trans):
+    for i, boxes in enumerate(pred_boxes):
+        if boxes is not None:
+            image_id = int(batch_img_id[i])
+            # have no target
+            if image_id == -1:
+                continue
+
+            for x1, y1, x2, y2, _, p, c in boxes:
+                x1, y1, x2, y2, p = float(x1), float(y1), float(x2), float(y2), float(p)
+                c = int(c)
+                x = x1
+                y = y1
+                w = x2 - x1
+                h = y2 - y1
+
+                json_result.append(
+                    {
+                        "image_id": image_id,
+                        "category_id": class_trans[c - 1],
+                        "bbox": [x, y, w, h],
+                        "score": p,
+                    }
+                )
+
+################## About TensorRT #################
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def setup_io_bindings(engine, context):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = context.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size
+        }
+        # print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
+##########################################################
+
+
+################## About Loading Dataset #################
+def load_images(images_path):
+    """
+    If image path is given, return it directly
+    For txt file, read it and return each line as image path
+    In other case, it's a folder, return a list with names of each
+    jpg, jpeg and png file
+    """
+    input_path_extension = images_path.split('.')[-1]
+    if input_path_extension in ['jpg', 'jpeg', 'png']:
+        return [images_path]
+    elif input_path_extension == "txt":
+        with open(images_path, "r") as f:
+            return f.read().splitlines()
+    else:
+        return glob.glob(
+            os.path.join(images_path, "*.jpg")) + \
+            glob.glob(os.path.join(images_path, "*.png")) + \
+            glob.glob(os.path.join(images_path, "*.jpeg"))
+
+def prepare_batch(images_path, bs=16, input_size=(608, 608)):
+
+    width, height = input_size
+
+    batch_names = []
+    batch_images = []
+    batch_shapes = []
+
+    temp_names = []
+    temp_images = []
+    temp_shapes = []
+
+    for i, image_path in tqdm(enumerate(images_path), desc="Loading coco data"):
+        name = os.path.basename(image_path)
+        image = cv2.imread(image_path)
+        h, w, _ = image.shape
+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image_resized = cv2.resize(image_rgb, (width, height),
+                                   interpolation=cv2.INTER_LINEAR)
+        custom_image = image_resized.transpose(2, 0, 1).astype(np.float32) / 255.
+        custom_image = np.expand_dims(custom_image, axis=0)
+
+        if i != 0 and i % bs == 0:
+            batch_names.append(temp_names)
+            batch_images.append(np.concatenate(temp_images, axis=0))
+            batch_shapes.append(temp_shapes)
+
+            temp_names = [name]
+            temp_images = [custom_image]
+            temp_shapes = [(h, w)]
+        else:
+            temp_names.append(name)
+            temp_images.append(custom_image)
+            temp_shapes.append((h, w))
+
+    return batch_names, batch_images, batch_shapes
+##########################################################
+
+
+################## About Operating box #################
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, ratio, (dw, dh)
+
+def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False):
+    # Rescale boxes (xyxy) from net_shape to ori_shape
+
+    if use_letterbox:
+
+        gain = min(
+            net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1]
+        )  # gain  = new / old
+        pad = (net_shape[1] - ori_shape[1] * gain) / 2, (
+            net_shape[0] - ori_shape[0] * gain
+        ) / 2.0
+
+        boxes[:, [0, 2]] -= pad[0]  # x padding
+        boxes[:, [1, 3]] -= pad[1]  # y padding
+        boxes[:, :4] /= gain
+    else:
+        x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0]
+
+        boxes[:, 0] /= x_scale
+        boxes[:, 1] /= y_scale
+        boxes[:, 2] /= x_scale
+        boxes[:, 3] /= y_scale
+
+    clip_boxes(boxes, ori_shape)
+    return boxes
+
+def clip_boxes(boxes, shape):
+
+    boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
+    boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
+##########################################################
+
+
+################## About pre and post processing #########
+def pre_processing(src_img, imgsz=608):
+    resized = cv2.resize(src_img, (imgsz, imgsz), interpolation=cv2.INTER_LINEAR)
+    in_img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
+    in_img = np.transpose(in_img, (2, 0, 1)).astype(np.float32)
+    in_img = np.expand_dims(in_img, axis=0)
+    in_img /= 255.0
+    return in_img
+
+def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
+    # print(boxes.shape)
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    areas = (x2 - x1) * (y2 - y1)
+    order = confs.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        idx_self = order[0]
+        idx_other = order[1:]
+
+        keep.append(idx_self)
+
+        xx1 = np.maximum(x1[idx_self], x1[idx_other])
+        yy1 = np.maximum(y1[idx_self], y1[idx_other])
+        xx2 = np.minimum(x2[idx_self], x2[idx_other])
+        yy2 = np.minimum(y2[idx_self], y2[idx_other])
+
+        w = np.maximum(0.0, xx2 - xx1)
+        h = np.maximum(0.0, yy2 - yy1)
+        inter = w * h
+
+        if min_mode:
+            over = inter / np.minimum(areas[order[0]], areas[order[1:]])
+        else:
+            over = inter / (areas[order[0]] + areas[order[1:]] - inter)
+
+        inds = np.where(over <= nms_thresh)[0]
+        order = order[inds + 1]
+
+    return np.array(keep)
+
+
+def post_processing(img, conf_thresh, nms_thresh, output, num_classes=80):
+
+    # [batch, num, 1, 4]
+    box_array = output[:, :, :4]
+    # [batch, num, 2]
+    class_confs = output[:, :, 4:]
+
+    max_conf = class_confs[:, :, 1]
+    max_id = class_confs[:, :, 0]
+
+    bboxes_batch = []
+    for i in range(box_array.shape[0]):
+
+        argwhere = max_conf[i] > conf_thresh
+        l_box_array = box_array[i, argwhere, :]
+        l_max_conf = max_conf[i, argwhere]
+        l_max_id = max_id[i, argwhere]
+
+        bboxes = []
+        # nms for each class
+        for j in range(num_classes):
+
+            cls_argwhere = l_max_id == j
+            ll_box_array = l_box_array[cls_argwhere, :]
+            ll_max_conf = l_max_conf[cls_argwhere]
+            ll_max_id = l_max_id[cls_argwhere]
+
+            keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
+
+            if (keep.size > 0):
+                ll_box_array = ll_box_array[keep, :]
+                ll_max_conf = ll_max_conf[keep]
+                ll_max_id = ll_max_id[keep]
+
+                for k in range(ll_box_array.shape[0]):
+                    bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2],
+                                  ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
+
+        bboxes_batch.append(bboxes)
+
+    return bboxes_batch
+##########################################################
+
diff --git a/models/cv/object_detection/yolov12/ixrt/export.py b/models/cv/object_detection/yolov12/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..388e3a8540b58262f9d8c8e545848addf3afb606
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/export.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+from ultralytics import YOLO
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--batch", 
+                type=int, 
+                required=True, 
+                help="batchsize of the model.")
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = YOLO(args.weight).cpu()
+    
+    model.export(format='onnx', batch=args.batch, imgsz=(640, 640), opset=11)
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/object_detection/yolov12/ixrt/inference.py b/models/cv/object_detection/yolov12/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ac908a7c8f8092f3fb59018cf295ecfb954fa0c
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/inference.py
@@ -0,0 +1,250 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import json
+import argparse
+import time
+import tensorrt
+from tensorrt import Dims
+from cuda import cuda, cudart
+import torch
+import numpy as np
+from tqdm import tqdm
+
+from common import create_engine_context, setup_io_bindings
+
+from pathlib import Path
+
+from ultralytics.cfg import get_cfg
+from ultralytics.data import converter
+from ultralytics.utils import DEFAULT_CFG
+from ultralytics.data.utils import check_det_dataset
+from ultralytics.utils.metrics import ConfusionMatrix
+from ultralytics.models.yolo.detect import DetectionValidator
+
+coco_classes = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 
+                10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 
+                20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 
+                30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 
+                40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 
+                50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 
+                60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 
+                70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--model_engine", 
+                        type=str, 
+                        required=True, 
+                        help="ixrt engine path.")
+    
+    parser.add_argument("--bsz",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument(
+        "--imgsz",
+        "--img",
+        "--img-size",
+        type=int,
+        default=640,
+        help="inference size h,w",
+    )
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+    
+    parser.add_argument("--warm_up", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=0.0,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=0.0,
+                        help="Model inference FPS target.")
+    
+    parser.add_argument("--conf",
+                        type=float,
+                        default=0.001,
+                        help="confidence threshold.")
+    
+    parser.add_argument("--iou",
+                        type=float,
+                        default=0.65,
+                        help="iou threshold.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+class IxRT_Validator(DetectionValidator):
+    def __call__(self, config, data):
+        self.data = data
+        self.stride = 32
+        self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch)
+        self.init_metrics()
+        
+        total_num = 0
+
+        input_name = "input"
+        host_mem = tensorrt.IHostMemory
+        logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+        engine, context = create_engine_context(config.model_engine, logger)
+        input_idx = engine.get_binding_index(input_name)
+        context.set_binding_shape(input_idx, Dims((config.bsz,3,config.imgsz,config.imgsz)))
+        inputs, outputs, allocations = setup_io_bindings(engine, context)
+        
+        if config.warm_up > 0:
+            print("\nWarm Start.")
+            for i in range(config.warm_up):
+                context.execute_v2(allocations)
+            print("Warm Done.")
+        
+        forward_time = 0.0
+        num_samples = 0   
+
+        e2e_start_time = time.time()
+        for batch in tqdm(self.dataloader):
+            batch = self.preprocess(batch)
+
+            imgs = batch['img']
+            pad_batch = len(imgs) != self.args.batch
+            if pad_batch:
+                origin_size = len(imgs)
+                imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:]))
+            
+            batch_data = np.ascontiguousarray(imgs)
+            data_shape = batch_data.shape
+            
+            cur_bsz_sample = batch_data.shape[0]
+            num_samples += cur_bsz_sample
+
+            # Set input
+            input_idx = engine.get_binding_index(input_name)
+            context.set_binding_shape(input_idx, Dims(data_shape))
+            inputs, outputs, allocations = setup_io_bindings(engine, context)
+
+            err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes)
+            assert(err == cuda.CUresult.CUDA_SUCCESS)
+            # Prepare the output data
+            output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+            
+            
+            start_time = time.time()
+            context.execute_v2(allocations)
+            end_time = time.time()
+            forward_time += end_time - start_time
+            
+            err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"])
+            assert(err == cuda.CUresult.CUDA_SUCCESS)
+
+            for alloc in allocations:
+                if not alloc:
+                    continue
+                (err,) = cudart.cudaFree(alloc)
+                assert err == cudart.cudaError_t.cudaSuccess   
+                
+            if pad_batch:
+                output = output[:origin_size]
+                
+            outputs = torch.from_numpy(output)
+            
+            preds = self.postprocess([outputs])
+            
+            self.update_metrics(preds, batch)
+
+        e2e_end_time = time.time()
+        if config.perf_only:
+            fps = num_samples / forward_time
+            return fps
+        else:
+            stats = self.get_stats()
+
+            if self.args.save_json and self.jdict:
+                with open(str(self.save_dir / 'predictions.json'), 'w') as f:
+                    print(f'Saving {f.name} ...')
+                    json.dump(self.jdict, f)  # flatten and save
+
+            stats = self.eval_json(stats)
+
+            end2end_time = e2e_end_time - e2e_start_time
+            print(F"E2E time : {end2end_time:.3f} seconds")
+
+            return stats
+
+    def init_metrics(self):
+        """Initialize evaluation metrics for YOLO."""
+        val = self.data.get(self.args.split, '')  # validation path
+        self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt')  # is COCO
+        self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000))
+        self.args.save_json |= self.is_coco and not self.training  # run on final val if training COCO
+        self.names = self.data['names']
+        self.nc = len(self.names)
+        self.metrics.names = self.names
+        self.confusion_matrix = ConfusionMatrix(nc=80)
+        self.seen = 0
+        self.jdict = []
+        self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
+
+def main():
+    config = parse_args()
+
+    batch_size = config.bsz
+
+    overrides = {'mode': 'val'}
+    cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
+
+    cfg_args.batch = batch_size
+    cfg_args.save_json = True
+
+    data = {
+        'path': Path(config.datasets),
+        'val': os.path.join(config.datasets, 'val2017.txt'),
+        'names': coco_classes
+    }
+
+    validator = IxRT_Validator(args=cfg_args, save_dir=Path('.'))
+    
+    if config.perf_only:
+        fps = validator(config, data)
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+    else:
+        stats = validator(config, data)
+        
+    
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/object_detection/yolov12/ixrt/quant.py b/models/cv/object_detection/yolov12/ixrt/quant.py
new file mode 100644
index 0000000000000000000000000000000000000000..48f9c914f4db85525bb7f52a1a5eec0cd9e3a8d0
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/quant.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import random
+import argparse
+import numpy as np
+from tensorrt.deploy import static_quantize
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from common import letterbox
+
+
+def setseed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name", type=str)
+    parser.add_argument("--model", type=str,  default="yolov4_bs16_without_decoder.onnx")
+    parser.add_argument("--dataset_dir", type=str,  default="./coco2017/val2017")
+    parser.add_argument("--ann_file", type=str,  default="./coco2017/annotations/instances_val2017.json")
+    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile")
+    parser.add_argument("--disable_quant_names", nargs='*', type=str)
+    parser.add_argument("--save_quant_model", type=str,  help="save the quantization model path", default=None)
+    parser.add_argument("--bsz", type=int, default=16)
+    parser.add_argument("--step", type=int, default=32)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--imgsz", type=int, default=608)
+    parser.add_argument("--use_letterbox", action="store_true")
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+setseed(args.seed)
+model_name = args.model_name
+
+
+def get_dataloader(data_dir, step=32, batch_size=16, new_shape=[608, 608], use_letterbox=False):
+    num = step * batch_size
+    val_list = [os.path.join(data_dir, x) for x in os.listdir(data_dir)]
+    random.shuffle(val_list)
+    pic_list = val_list[:num]
+
+    calibration_dataset = []
+    for file_path in pic_list:
+        pic_data = cv2.imread(file_path)
+        org_img = pic_data
+        assert org_img is not None, 'Image not Found ' + file_path
+        h0, w0 = org_img.shape[:2]
+
+        if use_letterbox:
+            img, ratio, dwdh = letterbox(org_img, new_shape=(new_shape[1], new_shape[0]), auto=False, scaleup=True)
+        else:
+            img = cv2.resize(org_img, new_shape)
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img) / 255.0  # 0~1 np array
+        img = torch.from_numpy(img).float()
+
+        calibration_dataset.append(img)
+
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=True,
+        batch_size=batch_size,
+        drop_last=True
+    )
+    return calibration_dataloader
+
+dataloader = get_dataloader(
+    data_dir=args.dataset_dir,
+    step=args.step,
+    batch_size=args.bsz,
+    new_shape=(args.imgsz, args.imgsz),
+    use_letterbox=args.use_letterbox
+)
+
+dirname = os.path.dirname(args.save_quant_model)
+quant_json_path = os.path.join(dirname, f"quantized_{model_name}.json")
+
+static_quantize(args.model,
+        calibration_dataloader=dataloader,
+        save_quant_onnx_path=args.save_quant_model,
+        save_quant_params_path=quant_json_path,
+        observer=args.observer,
+        data_preprocess=lambda x: x.to("cuda"),
+        quant_format="qdq",
+        disable_quant_names=args.disable_quant_names)
diff --git a/models/cv/object_detection/yolov12/ixrt/requirements.txt b/models/cv/object_detection/yolov12/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..355ab4899bd79ee95e9f38dcc57997e370b17b8c
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/requirements.txt
@@ -0,0 +1,3 @@
+tqdm
+onnx==1.13.0
+huggingface_hub
\ No newline at end of file
diff --git a/models/cv/object_detection/yolov12/ixrt/scripts/infer_yolov12_fp16_accuracy.sh b/models/cv/object_detection/yolov12/ixrt/scripts/infer_yolov12_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7eb2e3605a8ba9102927d72788dbcec5e6440520
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/scripts/infer_yolov12_fp16_accuracy.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+PROJ_DIR=${PROJ_DIR}
+DATASETS_DIR=${DATASETS_DIR}
+COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
+EVAL_DIR=${DATASETS_DIR}/images/val2017
+CHECKPOINTS_DIR=${CHECKPOINTS_DIR}
+RUN_DIR=${PROJ_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo ====================== Model Info ======================
+echo Model Name : yolov12
+echo Onnx Path : ${ORIGINE_MODEL}
+
+BATCH_SIZE=32
+CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov12.onnx
+
+# Build Engine
+echo Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/yolov12_fp16.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision float16                        \
+        --model ${CURRENT_MODEL}                \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+echo Inference
+RUN_BATCH_SIZE=32
+python3 ${RUN_DIR}/inference.py                 \
+    --model_engine ${ENGINE_FILE}                \
+    --warm_up 2                                 \
+    --bsz ${RUN_BATCH_SIZE}                         \
+    --imgsz 640                              \
+    --datasets ${DATASETS_DIR}               \
+    --acc_target 0.3                     
+exit ${EXIT_STATUS}
diff --git a/models/cv/object_detection/yolov12/ixrt/scripts/infer_yolov12_fp16_performance.sh b/models/cv/object_detection/yolov12/ixrt/scripts/infer_yolov12_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c228d1cb93d14c0b3cbaee4e8e14bc56464e02e4
--- /dev/null
+++ b/models/cv/object_detection/yolov12/ixrt/scripts/infer_yolov12_fp16_performance.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+PROJ_DIR=${PROJ_DIR}
+DATASETS_DIR=${DATASETS_DIR}
+COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
+EVAL_DIR=${DATASETS_DIR}/images/val2017
+CHECKPOINTS_DIR=${CHECKPOINTS_DIR}
+RUN_DIR=${PROJ_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo ====================== Model Info ======================
+echo Model Name : yolov12
+echo Onnx Path : ${ORIGINE_MODEL}
+
+BATCH_SIZE=32
+CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov12.onnx
+
+# Build Engine
+echo Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/yolov12_fp16.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision float16                        \
+        --model ${CURRENT_MODEL}                \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+echo Inference
+RUN_BATCH_SIZE=32
+python3 ${RUN_DIR}/inference.py                 \
+    --model_engine ${ENGINE_FILE}                \
+    --warm_up 2                                 \
+    --bsz ${RUN_BATCH_SIZE}                         \
+    --imgsz 640                              \
+    --datasets ${DATASETS_DIR}               \
+    --perf_only true                         \
+    --fps_target 0.0                     
+exit ${EXIT_STATUS}
diff --git a/models/cv/object_detection/yolov13/ixrt/README.md b/models/cv/object_detection/yolov13/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8a48d79f00a8be18104c8219971d5b64108272e3
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/README.md
@@ -0,0 +1,90 @@
+# YOLOv13 (ixRT)
+
+## Model Description
+
+YOLOv13 addresses the detection performance bottlenecks of the traditional YOLO series in complex scenarios through innovative HyperACE and FullPAD mechanisms. Additionally, it incorporates lightweight design to significantly reduce computational complexity and parameter count, making it an accurate and efficient object detection model.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0     |  25.12  |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <https://github.com/iMoonLab/yolov13/releases/download/yolov13/yolov13n.pt>
+
+Dataset:
+  - <https://github.com/ultralytics/assets/releases/download/v0.0.0/coco2017labels.zip> to download the labels dataset.
+  - <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
+  - <http://images.cocodataset.org/zips/train2017.zip> to download the train dataset.
+
+```bash
+unzip -q -d ./ coco2017labels.zip
+unzip -q -d ./coco/images/ train2017.zip
+unzip -q -d ./coco/images/ val2017.zip
+
+coco
+├── annotations
+│   └── instances_val2017.json
+├── images
+│   ├── train2017
+│   └── val2017
+├── labels
+│   ├── train2017
+│   └── val2017
+├── LICENSE
+├── README.txt
+├── test-dev2017.txt
+├── train2017.cache
+├── train2017.txt
+├── val2017.cache
+└── val2017.txt
+```
+
+### Install Dependencies
+
+```bash
+pip3 install -r requirements.txt
+```
+
+## Model Conversion
+
+```bash
+git clone --depth 1 https://github.com/iMoonLab/yolov13.git
+cd yolov13
+pip3 install -e .
+cd ..
+mv yolov13n.pt yolov13.pt
+python3 export.py --weight yolov13.pt --batch 32
+```
+
+## Model Inference
+
+```bash
+export PROJ_DIR=./
+export DATASETS_DIR=/path/to/coco/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=./
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_yolov13_fp16_accuracy.sh
+# Performance
+bash scripts/infer_yolov13_fp16_performance.sh
+```
+
+## Model Results
+
+| Model   | BatchSize | Precision | FPS     | IOU@0.5 | IOU@0.5:0.95 |
+| ------- | --------- | --------- | ------- | ------- | ------------ |
+| YOLOv13 | 32        | FP16      | 413.62  | 0.574   | 0.412        |
+
+## References
+
+- [YOLOv13](https://github.com/iMoonLab/yolov13)
diff --git a/models/cv/object_detection/yolov13/ixrt/build_engine.py b/models/cv/object_detection/yolov13/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0839fc3229ef4ab6d23c7ffeae36a1962bcaaf1
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/build_engine.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+from tensorrt import Dims
+
+
+def build_engine_trtapi_staticshape(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    # print("precision : ", precision)
+    build_config.set_flag(precision)
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+    print("Build static shape engine done!")
+
+
+def build_engine_trtapi_dynamicshape(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("input",
+                        Dims([1, 3, 608, 608]),
+                        Dims([32, 3, 608, 608]),
+                        Dims([64, 3, 608, 608]),
+    )
+    build_config.add_optimization_profile(profile)
+
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    # print("precision : ", precision)
+    build_config.set_flag(precision)
+
+    # set dynamic
+    num_inputs = network.num_inputs
+    for i in range(num_inputs):
+        input_tensor = network.get_input(i)
+        input_tensor.shape = Dims([-1, 3, 608, 608])
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+    print("Build dynamic shape engine done!")
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+            help="The precision of datatype")
+    # engine args
+    parser.add_argument("--engine", type=str, default=None)
+
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    build_engine_trtapi_staticshape(args)
+    # build_engine_trtapi_dynamicshape(args)
diff --git a/models/cv/object_detection/yolov13/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov13/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7d678271803509f4d7ec05ef181cf399a3d04017
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/ci/prepare.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+pip3 install -r requirements.txt
+cp -r /mnt/deepspark/data/repos/yolov13 ./
+
+cd yolov13
+pip3 install -e .
+cd ..
+
+mkdir checkpoints
+mv yolov13n.pt yolov13.pt
+python3 export.py --weight yolov13.pt --batch 32
+mv yolov13.onnx checkpoints/
diff --git a/models/cv/object_detection/yolov13/ixrt/common.py b/models/cv/object_detection/yolov13/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..74022b562af71ef8a234b5075252ea6a32505558
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/common.py
@@ -0,0 +1,337 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import glob
+import time
+import numpy as np
+from tqdm import tqdm
+
+import tensorrt
+from cuda import cuda, cudart
+
+
+def load_class_names(namesfile):
+    class_names = []
+    with open(namesfile, 'r') as fp:
+        lines = fp.readlines()
+    for line in lines:
+        line = line.rstrip()
+        class_names.append(line)
+    return class_names
+
+# input  : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)]
+# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)]
+def box_class85to6(input):
+    center_x_y = input[:, :2]
+    side = input[:, 2:4]
+    conf = input[:, 4:5]
+    class_id = np.argmax(input[:, 5:], axis = -1)
+    class_id = class_id.astype(np.float32).reshape(-1, 1) + 1
+    max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1)
+    x1_y1 = center_x_y - 0.5 * side
+    x2_y2 = center_x_y + 0.5 * side
+    nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1)
+    return nms_input
+
+def save2json(batch_img_id, pred_boxes, json_result, class_trans):
+    for i, boxes in enumerate(pred_boxes):
+        if boxes is not None:
+            image_id = int(batch_img_id[i])
+            # have no target
+            if image_id == -1:
+                continue
+
+            for x1, y1, x2, y2, _, p, c in boxes:
+                x1, y1, x2, y2, p = float(x1), float(y1), float(x2), float(y2), float(p)
+                c = int(c)
+                x = x1
+                y = y1
+                w = x2 - x1
+                h = y2 - y1
+
+                json_result.append(
+                    {
+                        "image_id": image_id,
+                        "category_id": class_trans[c - 1],
+                        "bbox": [x, y, w, h],
+                        "score": p,
+                    }
+                )
+
+################## About TensorRT #################
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def setup_io_bindings(engine, context):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = context.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size
+        }
+        # print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
+##########################################################
+
+
+################## About Loading Dataset #################
+def load_images(images_path):
+    """
+    If image path is given, return it directly
+    For txt file, read it and return each line as image path
+    In other case, it's a folder, return a list with names of each
+    jpg, jpeg and png file
+    """
+    input_path_extension = images_path.split('.')[-1]
+    if input_path_extension in ['jpg', 'jpeg', 'png']:
+        return [images_path]
+    elif input_path_extension == "txt":
+        with open(images_path, "r") as f:
+            return f.read().splitlines()
+    else:
+        return glob.glob(
+            os.path.join(images_path, "*.jpg")) + \
+            glob.glob(os.path.join(images_path, "*.png")) + \
+            glob.glob(os.path.join(images_path, "*.jpeg"))
+
+def prepare_batch(images_path, bs=16, input_size=(608, 608)):
+
+    width, height = input_size
+
+    batch_names = []
+    batch_images = []
+    batch_shapes = []
+
+    temp_names = []
+    temp_images = []
+    temp_shapes = []
+
+    for i, image_path in tqdm(enumerate(images_path), desc="Loading coco data"):
+        name = os.path.basename(image_path)
+        image = cv2.imread(image_path)
+        h, w, _ = image.shape
+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image_resized = cv2.resize(image_rgb, (width, height),
+                                   interpolation=cv2.INTER_LINEAR)
+        custom_image = image_resized.transpose(2, 0, 1).astype(np.float32) / 255.
+        custom_image = np.expand_dims(custom_image, axis=0)
+
+        if i != 0 and i % bs == 0:
+            batch_names.append(temp_names)
+            batch_images.append(np.concatenate(temp_images, axis=0))
+            batch_shapes.append(temp_shapes)
+
+            temp_names = [name]
+            temp_images = [custom_image]
+            temp_shapes = [(h, w)]
+        else:
+            temp_names.append(name)
+            temp_images.append(custom_image)
+            temp_shapes.append((h, w))
+
+    return batch_names, batch_images, batch_shapes
+##########################################################
+
+
+################## About Operating box #################
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, ratio, (dw, dh)
+
+def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False):
+    # Rescale boxes (xyxy) from net_shape to ori_shape
+
+    if use_letterbox:
+
+        gain = min(
+            net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1]
+        )  # gain  = new / old
+        pad = (net_shape[1] - ori_shape[1] * gain) / 2, (
+            net_shape[0] - ori_shape[0] * gain
+        ) / 2.0
+
+        boxes[:, [0, 2]] -= pad[0]  # x padding
+        boxes[:, [1, 3]] -= pad[1]  # y padding
+        boxes[:, :4] /= gain
+    else:
+        x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0]
+
+        boxes[:, 0] /= x_scale
+        boxes[:, 1] /= y_scale
+        boxes[:, 2] /= x_scale
+        boxes[:, 3] /= y_scale
+
+    clip_boxes(boxes, ori_shape)
+    return boxes
+
+def clip_boxes(boxes, shape):
+
+    boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
+    boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2
+##########################################################
+
+
+################## About pre and post processing #########
+def pre_processing(src_img, imgsz=608):
+    resized = cv2.resize(src_img, (imgsz, imgsz), interpolation=cv2.INTER_LINEAR)
+    in_img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
+    in_img = np.transpose(in_img, (2, 0, 1)).astype(np.float32)
+    in_img = np.expand_dims(in_img, axis=0)
+    in_img /= 255.0
+    return in_img
+
+def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
+    # print(boxes.shape)
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    areas = (x2 - x1) * (y2 - y1)
+    order = confs.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        idx_self = order[0]
+        idx_other = order[1:]
+
+        keep.append(idx_self)
+
+        xx1 = np.maximum(x1[idx_self], x1[idx_other])
+        yy1 = np.maximum(y1[idx_self], y1[idx_other])
+        xx2 = np.minimum(x2[idx_self], x2[idx_other])
+        yy2 = np.minimum(y2[idx_self], y2[idx_other])
+
+        w = np.maximum(0.0, xx2 - xx1)
+        h = np.maximum(0.0, yy2 - yy1)
+        inter = w * h
+
+        if min_mode:
+            over = inter / np.minimum(areas[order[0]], areas[order[1:]])
+        else:
+            over = inter / (areas[order[0]] + areas[order[1:]] - inter)
+
+        inds = np.where(over <= nms_thresh)[0]
+        order = order[inds + 1]
+
+    return np.array(keep)
+
+
+def post_processing(img, conf_thresh, nms_thresh, output, num_classes=80):
+
+    # [batch, num, 1, 4]
+    box_array = output[:, :, :4]
+    # [batch, num, 2]
+    class_confs = output[:, :, 4:]
+
+    max_conf = class_confs[:, :, 1]
+    max_id = class_confs[:, :, 0]
+
+    bboxes_batch = []
+    for i in range(box_array.shape[0]):
+
+        argwhere = max_conf[i] > conf_thresh
+        l_box_array = box_array[i, argwhere, :]
+        l_max_conf = max_conf[i, argwhere]
+        l_max_id = max_id[i, argwhere]
+
+        bboxes = []
+        # nms for each class
+        for j in range(num_classes):
+
+            cls_argwhere = l_max_id == j
+            ll_box_array = l_box_array[cls_argwhere, :]
+            ll_max_conf = l_max_conf[cls_argwhere]
+            ll_max_id = l_max_id[cls_argwhere]
+
+            keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
+
+            if (keep.size > 0):
+                ll_box_array = ll_box_array[keep, :]
+                ll_max_conf = ll_max_conf[keep]
+                ll_max_id = ll_max_id[keep]
+
+                for k in range(ll_box_array.shape[0]):
+                    bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2],
+                                  ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
+
+        bboxes_batch.append(bboxes)
+
+    return bboxes_batch
+##########################################################
+
diff --git a/models/cv/object_detection/yolov13/ixrt/export.py b/models/cv/object_detection/yolov13/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..388e3a8540b58262f9d8c8e545848addf3afb606
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/export.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+from ultralytics import YOLO
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--batch", 
+                type=int, 
+                required=True, 
+                help="batchsize of the model.")
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = YOLO(args.weight).cpu()
+    
+    model.export(format='onnx', batch=args.batch, imgsz=(640, 640), opset=11)
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/object_detection/yolov13/ixrt/inference.py b/models/cv/object_detection/yolov13/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ac908a7c8f8092f3fb59018cf295ecfb954fa0c
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/inference.py
@@ -0,0 +1,250 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import json
+import argparse
+import time
+import tensorrt
+from tensorrt import Dims
+from cuda import cuda, cudart
+import torch
+import numpy as np
+from tqdm import tqdm
+
+from common import create_engine_context, setup_io_bindings
+
+from pathlib import Path
+
+from ultralytics.cfg import get_cfg
+from ultralytics.data import converter
+from ultralytics.utils import DEFAULT_CFG
+from ultralytics.data.utils import check_det_dataset
+from ultralytics.utils.metrics import ConfusionMatrix
+from ultralytics.models.yolo.detect import DetectionValidator
+
+coco_classes = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 
+                10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 
+                20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 
+                30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 
+                40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 
+                50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 
+                60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 
+                70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--model_engine", 
+                        type=str, 
+                        required=True, 
+                        help="ixrt engine path.")
+    
+    parser.add_argument("--bsz",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument(
+        "--imgsz",
+        "--img",
+        "--img-size",
+        type=int,
+        default=640,
+        help="inference size h,w",
+    )
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+    
+    parser.add_argument("--warm_up", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=0.0,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=0.0,
+                        help="Model inference FPS target.")
+    
+    parser.add_argument("--conf",
+                        type=float,
+                        default=0.001,
+                        help="confidence threshold.")
+    
+    parser.add_argument("--iou",
+                        type=float,
+                        default=0.65,
+                        help="iou threshold.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+class IxRT_Validator(DetectionValidator):
+    def __call__(self, config, data):
+        self.data = data
+        self.stride = 32
+        self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch)
+        self.init_metrics()
+        
+        total_num = 0
+
+        input_name = "input"
+        host_mem = tensorrt.IHostMemory
+        logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+        engine, context = create_engine_context(config.model_engine, logger)
+        input_idx = engine.get_binding_index(input_name)
+        context.set_binding_shape(input_idx, Dims((config.bsz,3,config.imgsz,config.imgsz)))
+        inputs, outputs, allocations = setup_io_bindings(engine, context)
+        
+        if config.warm_up > 0:
+            print("\nWarm Start.")
+            for i in range(config.warm_up):
+                context.execute_v2(allocations)
+            print("Warm Done.")
+        
+        forward_time = 0.0
+        num_samples = 0   
+
+        e2e_start_time = time.time()
+        for batch in tqdm(self.dataloader):
+            batch = self.preprocess(batch)
+
+            imgs = batch['img']
+            pad_batch = len(imgs) != self.args.batch
+            if pad_batch:
+                origin_size = len(imgs)
+                imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:]))
+            
+            batch_data = np.ascontiguousarray(imgs)
+            data_shape = batch_data.shape
+            
+            cur_bsz_sample = batch_data.shape[0]
+            num_samples += cur_bsz_sample
+
+            # Set input
+            input_idx = engine.get_binding_index(input_name)
+            context.set_binding_shape(input_idx, Dims(data_shape))
+            inputs, outputs, allocations = setup_io_bindings(engine, context)
+
+            err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes)
+            assert(err == cuda.CUresult.CUDA_SUCCESS)
+            # Prepare the output data
+            output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+            
+            
+            start_time = time.time()
+            context.execute_v2(allocations)
+            end_time = time.time()
+            forward_time += end_time - start_time
+            
+            err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"])
+            assert(err == cuda.CUresult.CUDA_SUCCESS)
+
+            for alloc in allocations:
+                if not alloc:
+                    continue
+                (err,) = cudart.cudaFree(alloc)
+                assert err == cudart.cudaError_t.cudaSuccess   
+                
+            if pad_batch:
+                output = output[:origin_size]
+                
+            outputs = torch.from_numpy(output)
+            
+            preds = self.postprocess([outputs])
+            
+            self.update_metrics(preds, batch)
+
+        e2e_end_time = time.time()
+        if config.perf_only:
+            fps = num_samples / forward_time
+            return fps
+        else:
+            stats = self.get_stats()
+
+            if self.args.save_json and self.jdict:
+                with open(str(self.save_dir / 'predictions.json'), 'w') as f:
+                    print(f'Saving {f.name} ...')
+                    json.dump(self.jdict, f)  # flatten and save
+
+            stats = self.eval_json(stats)
+
+            end2end_time = e2e_end_time - e2e_start_time
+            print(F"E2E time : {end2end_time:.3f} seconds")
+
+            return stats
+
+    def init_metrics(self):
+        """Initialize evaluation metrics for YOLO."""
+        val = self.data.get(self.args.split, '')  # validation path
+        self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt')  # is COCO
+        self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000))
+        self.args.save_json |= self.is_coco and not self.training  # run on final val if training COCO
+        self.names = self.data['names']
+        self.nc = len(self.names)
+        self.metrics.names = self.names
+        self.confusion_matrix = ConfusionMatrix(nc=80)
+        self.seen = 0
+        self.jdict = []
+        self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
+
+def main():
+    config = parse_args()
+
+    batch_size = config.bsz
+
+    overrides = {'mode': 'val'}
+    cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
+
+    cfg_args.batch = batch_size
+    cfg_args.save_json = True
+
+    data = {
+        'path': Path(config.datasets),
+        'val': os.path.join(config.datasets, 'val2017.txt'),
+        'names': coco_classes
+    }
+
+    validator = IxRT_Validator(args=cfg_args, save_dir=Path('.'))
+    
+    if config.perf_only:
+        fps = validator(config, data)
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+    else:
+        stats = validator(config, data)
+        
+    
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/object_detection/yolov13/ixrt/quant.py b/models/cv/object_detection/yolov13/ixrt/quant.py
new file mode 100644
index 0000000000000000000000000000000000000000..48f9c914f4db85525bb7f52a1a5eec0cd9e3a8d0
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/quant.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import random
+import argparse
+import numpy as np
+from tensorrt.deploy import static_quantize
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from common import letterbox
+
+
+def setseed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name", type=str)
+    parser.add_argument("--model", type=str,  default="yolov4_bs16_without_decoder.onnx")
+    parser.add_argument("--dataset_dir", type=str,  default="./coco2017/val2017")
+    parser.add_argument("--ann_file", type=str,  default="./coco2017/annotations/instances_val2017.json")
+    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile")
+    parser.add_argument("--disable_quant_names", nargs='*', type=str)
+    parser.add_argument("--save_quant_model", type=str,  help="save the quantization model path", default=None)
+    parser.add_argument("--bsz", type=int, default=16)
+    parser.add_argument("--step", type=int, default=32)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--imgsz", type=int, default=608)
+    parser.add_argument("--use_letterbox", action="store_true")
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+setseed(args.seed)
+model_name = args.model_name
+
+
+def get_dataloader(data_dir, step=32, batch_size=16, new_shape=[608, 608], use_letterbox=False):
+    num = step * batch_size
+    val_list = [os.path.join(data_dir, x) for x in os.listdir(data_dir)]
+    random.shuffle(val_list)
+    pic_list = val_list[:num]
+
+    calibration_dataset = []
+    for file_path in pic_list:
+        pic_data = cv2.imread(file_path)
+        org_img = pic_data
+        assert org_img is not None, 'Image not Found ' + file_path
+        h0, w0 = org_img.shape[:2]
+
+        if use_letterbox:
+            img, ratio, dwdh = letterbox(org_img, new_shape=(new_shape[1], new_shape[0]), auto=False, scaleup=True)
+        else:
+            img = cv2.resize(org_img, new_shape)
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img) / 255.0  # 0~1 np array
+        img = torch.from_numpy(img).float()
+
+        calibration_dataset.append(img)
+
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=True,
+        batch_size=batch_size,
+        drop_last=True
+    )
+    return calibration_dataloader
+
+dataloader = get_dataloader(
+    data_dir=args.dataset_dir,
+    step=args.step,
+    batch_size=args.bsz,
+    new_shape=(args.imgsz, args.imgsz),
+    use_letterbox=args.use_letterbox
+)
+
+dirname = os.path.dirname(args.save_quant_model)
+quant_json_path = os.path.join(dirname, f"quantized_{model_name}.json")
+
+static_quantize(args.model,
+        calibration_dataloader=dataloader,
+        save_quant_onnx_path=args.save_quant_model,
+        save_quant_params_path=quant_json_path,
+        observer=args.observer,
+        data_preprocess=lambda x: x.to("cuda"),
+        quant_format="qdq",
+        disable_quant_names=args.disable_quant_names)
diff --git a/models/cv/object_detection/yolov13/ixrt/requirements.txt b/models/cv/object_detection/yolov13/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..763d3b6b6d36b3e49917bd4cdafe9b9a76e0dc34
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tqdm
+onnx==1.13.0
+huggingface_hub
+ultralytics
\ No newline at end of file
diff --git a/models/cv/object_detection/yolov13/ixrt/scripts/infer_yolov13_fp16_accuracy.sh b/models/cv/object_detection/yolov13/ixrt/scripts/infer_yolov13_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2ea5fb392559c9d3c251143339f47980883642a6
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/scripts/infer_yolov13_fp16_accuracy.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+PROJ_DIR=${PROJ_DIR}
+DATASETS_DIR=${DATASETS_DIR}
+COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
+EVAL_DIR=${DATASETS_DIR}/images/val2017
+CHECKPOINTS_DIR=${CHECKPOINTS_DIR}
+RUN_DIR=${PROJ_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo ====================== Model Info ======================
+echo Model Name : yolov13
+echo Onnx Path : ${ORIGINE_MODEL}
+
+BATCH_SIZE=32
+CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov13.onnx
+
+# Build Engine
+echo Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/yolov13_fp16.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision float16                        \
+        --model ${CURRENT_MODEL}                \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+echo Inference
+RUN_BATCH_SIZE=32
+python3 ${RUN_DIR}/inference.py                 \
+    --model_engine ${ENGINE_FILE}                \
+    --warm_up 2                                 \
+    --bsz ${RUN_BATCH_SIZE}                         \
+    --imgsz 640                              \
+    --datasets ${DATASETS_DIR}               \
+    --acc_target 0.3                     
+exit ${EXIT_STATUS}
diff --git a/models/cv/object_detection/yolov13/ixrt/scripts/infer_yolov13_fp16_performance.sh b/models/cv/object_detection/yolov13/ixrt/scripts/infer_yolov13_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c5b8d859ea3199476216c1e6741e2bfdb5a83af2
--- /dev/null
+++ b/models/cv/object_detection/yolov13/ixrt/scripts/infer_yolov13_fp16_performance.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+PROJ_DIR=${PROJ_DIR}
+DATASETS_DIR=${DATASETS_DIR}
+COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
+EVAL_DIR=${DATASETS_DIR}/images/val2017
+CHECKPOINTS_DIR=${CHECKPOINTS_DIR}
+RUN_DIR=${PROJ_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo ====================== Model Info ======================
+echo Model Name : yolov13
+echo Onnx Path : ${ORIGINE_MODEL}
+
+BATCH_SIZE=32
+CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov13.onnx
+
+# Build Engine
+echo Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/yolov13_fp16.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision float16                        \
+        --model ${CURRENT_MODEL}                \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+echo Inference
+RUN_BATCH_SIZE=32
+python3 ${RUN_DIR}/inference.py                 \
+    --model_engine ${ENGINE_FILE}                \
+    --warm_up 2                                 \
+    --bsz ${RUN_BATCH_SIZE}                         \
+    --imgsz 640                              \
+    --datasets ${DATASETS_DIR}               \
+    --perf_only true                         \
+    --fps_target 0.0                     
+exit ${EXIT_STATUS}
diff --git a/models/multimodal/vision_language_model/nvlm/vllm/README.md b/models/multimodal/vision_language_model/nvlm/vllm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2a454ad1d8e2551803a3e57cfb6c888cb2b297ab
--- /dev/null
+++ b/models/multimodal/vision_language_model/nvlm/vllm/README.md
@@ -0,0 +1,38 @@
+# NVLM (vLLM)
+
+## Model Description
+
+NVLM, a family of frontier-class multimodal large language models (LLMs) that achieve state-of-the-art results on vision-language tasks, rivaling the leading proprietary models (e.g., GPT-4o) and open-access models (e.g., Llama 3-V 405B and InternVL 2). Remarkably, NVLM 1.0 shows improved text-only performance over its LLM backbone after multimodal training.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0 | 25.12 |
+
+## Model Preparation
+
+### Prepare Resources
+
+- Model: <https://huggingface.co/nvidia/NVLM-D-72B>
+
+```bash
+cp -r ../../vllm_public_assets/ ./
+
+# Download model from the website and make sure the model's path is "data/NVLM-D-72B"
+mkdir data
+```
+
+### Install Dependencies
+
+In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website.
+
+
+## Model Inference
+
+```bash
+export VLLM_ASSETS_CACHE=../vllm/
+export VLLM_FORCE_NCCL_COMM=1
+python3 offline_inference_vision_language.py --model data/NVLM-D-72B -tp 8
+
+## Model Results
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/nvlm/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/nvlm/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b54c8d39af69827a8993f582b3029fd463c58c0b
--- /dev/null
+++ b/models/multimodal/vision_language_model/nvlm/vllm/ci/prepare.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/nvlm/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_model/nvlm/vllm/offline_inference_vision_language.py
new file mode 100644
index 0000000000000000000000000000000000000000..db7e8daf8158b3f2f1a538c79c5d5c742e216e7c
--- /dev/null
+++ b/models/multimodal/vision_language_model/nvlm/vllm/offline_inference_vision_language.py
@@ -0,0 +1,160 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This example shows how to use vLLM for running offline inference 
+with the correct prompt format on vision language models.
+
+For most models, the prompt format should follow corresponding examples
+on HuggingFace model repository.
+"""
+from transformers import AutoTokenizer
+
+from vllm import LLM, SamplingParams
+from vllm.assets.image import ImageAsset
+from vllm.assets.video import VideoAsset
+from vllm.utils import FlexibleArgumentParser
+
+# NOTE: The default `max_num_seqs` and `max_model_len` may result in OOM on
+# lower-end GPUs.
+# Unless specified, these settings have been tested to work on a single L4.
+
+
+# NVLM-D
+def run_nvlm_d(question: str, modality: str, model: str, tp: int):
+    assert modality == "image"
+
+    # Adjust this as necessary to fit in GPU
+    llm = LLM(
+        model=model,
+        trust_remote_code=True,
+        max_model_len=4096,
+        tensor_parallel_size=tp,
+    )
+
+    tokenizer = AutoTokenizer.from_pretrained(model,
+                                              trust_remote_code=True)
+    messages = [{'role': 'user', 'content': f"<image>\n{question}"}]
+    prompt = tokenizer.apply_chat_template(messages,
+                                           tokenize=False,
+                                           add_generation_prompt=True)
+    stop_token_ids = None
+    return llm, prompt, stop_token_ids
+
+
+def get_multi_modal_input(args):
+    """
+    return {
+        "data": image or video,
+        "question": question,
+    }
+    """
+    if args.modality == "image":
+        # Input image and question
+        image = ImageAsset("cherry_blossom") \
+            .pil_image.convert("RGB")
+        img_question = "What is the content of this image?"
+
+        return {
+            "data": image,
+            "question": img_question,
+        }
+
+    if args.modality == "video":
+        # Input video and question
+        video = VideoAsset(name="sample_demo_1.mp4",
+                           num_frames=args.num_frames).np_ndarrays
+        vid_question = "Why is this video funny?"
+
+        return {
+            "data": video,
+            "question": vid_question,
+        }
+
+    msg = f"Modality {args.modality} is not supported."
+    raise ValueError(msg)
+
+
+def main(args):
+    model = args.model
+    tp = args.tensor_parallel_size
+    modality = args.modality
+    
+    mm_input = get_multi_modal_input(args)
+    data = mm_input["data"]
+    question = mm_input["question"]
+
+    llm, prompt, stop_token_ids = run_nvlm_d(question, modality,model, tp)
+
+    # We set temperature to 0.2 so that outputs can be different
+    # even when all prompts are identical when running batch inference.
+    sampling_params = SamplingParams(temperature=0.2,
+                                     max_tokens=64,
+                                     stop_token_ids=stop_token_ids)
+
+    assert args.num_prompts > 0
+    if args.num_prompts == 1:
+        # Single inference
+        inputs = {
+            "prompt": prompt,
+            "multi_modal_data": {
+                modality: data
+            },
+        }
+
+    else:
+        # Batch inference
+        inputs = [{
+            "prompt": prompt,
+            "multi_modal_data": {
+                modality: data
+            },
+        } for _ in range(args.num_prompts)]
+
+    outputs = llm.generate(inputs, sampling_params=sampling_params)
+
+    for o in outputs:
+        generated_text = o.outputs[0].text
+        print(generated_text)
+
+
+if __name__ == "__main__":
+    parser = FlexibleArgumentParser(
+        description='Demo on using vLLM for offline inference with '
+        'vision language models')
+    parser.add_argument('--model',
+                        '-m',
+                        type=str,
+                        help='model dir')
+    parser.add_argument('--tensor-parallel-size',
+                        '-tp',
+                        type=int,
+                        help='Tensor parallel size')
+    parser.add_argument('--num-prompts',
+                        type=int,
+                        default=1,
+                        help='Number of prompts to run.')
+    parser.add_argument('--modality',
+                        type=str,
+                        default="image",
+                        choices=['image', 'video'],
+                        help='Modality of the input.')
+    parser.add_argument('--num-frames',
+                        type=int,
+                        default=16,
+                        help='Number of frames to extract from the video.')
+    args = parser.parse_args()
+    main(args)
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/paligemma/vllm/README.md b/models/multimodal/vision_language_model/paligemma/vllm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7d484c6ceee23fa53ae7ddc543707ca73b986e5b
--- /dev/null
+++ b/models/multimodal/vision_language_model/paligemma/vllm/README.md
@@ -0,0 +1,37 @@
+# PaliGemma (vLLM)
+
+## Model Description
+
+PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. It takes both image and text as input and generates text as output, supporting multiple languages. It is designed for class-leading fine-tune performance on a wide range of vision-language tasks such as image and short video caption, visual question answering, text reading, object detection and object segmentation.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0 | 25.12 |
+
+## Model Preparation
+
+### Prepare Resources
+
+- Model: <https://huggingface.co/google/paligemma-3b-pt-224>
+
+```bash
+cp -r ../../vllm_public_assets/ ./
+
+# Download model from the website and make sure the model's path is "data/paligemma-3b-pt-224"
+mkdir data
+```
+
+### Install Dependencies
+
+In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website.
+
+
+## Model Inference
+
+```bash
+export VLLM_ASSETS_CACHE=../vllm/
+python3 offline_inference_vision_language.py --model data/paligemma-3b-pt-224 --max-tokens 256  --trust-remote-code --temperature 0.0 
+
+## Model Results
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/paligemma/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/paligemma/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b54c8d39af69827a8993f582b3029fd463c58c0b
--- /dev/null
+++ b/models/multimodal/vision_language_model/paligemma/vllm/ci/prepare.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/paligemma/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_model/paligemma/vllm/offline_inference_vision_language.py
new file mode 100644
index 0000000000000000000000000000000000000000..71d8fb1c6e22e8aa4fe6271acd64242a50c500d2
--- /dev/null
+++ b/models/multimodal/vision_language_model/paligemma/vllm/offline_inference_vision_language.py
@@ -0,0 +1,145 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This example shows how to use vLLM for running offline inference 
+with the correct prompt format on vision language models.
+
+For most models, the prompt format should follow corresponding examples
+on HuggingFace model repository.
+"""
+import sys
+from pathlib import Path
+import os
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
+import argparse
+import dataclasses
+import inspect
+
+from vllm.assets.image import ImageAsset
+from vllm.assets.video import VideoAsset
+from vllm import LLM, EngineArgs, SamplingParams
+from utils import sampling_add_cli_args
+
+# PaliGemma
+def run_paligemma(question,engine_params,modality):
+    assert modality == "image"
+    # PaliGemma has special prompt format for VQA
+    prompt = "caption en"
+    llm = LLM(**engine_params)
+    stop_token_ids = None
+    return llm, prompt, stop_token_ids
+
+
+
+def get_multi_modal_input(args):
+    """
+    return {
+        "data": image or video,
+        "question": question,
+    }
+    """
+    if args.modality == "image":
+        # Input image and question
+        image = ImageAsset("cherry_blossom").pil_image.convert("RGB")
+        img_question = "What is the content of this image?"
+
+        return {
+            "data": image,
+            "question": img_question,
+        }
+
+    if args.modality == "video":
+        # Input video and question
+        video = VideoAsset(name="sample_demo_1.mp4",
+                           num_frames=args.num_frames).np_ndarrays
+        vid_question = "Why is this video funny?"
+
+        return {
+            "data": video,
+            "question": vid_question,
+        }
+
+    msg = f"Modality {args.modality} is not supported."
+    raise ValueError(msg)
+
+
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--num-prompts',
+                        type=int,
+                        default=1,
+                        help='Number of prompts to run.')
+    parser.add_argument('--modality',
+                        type=str,
+                        default="image",
+                        help='Modality of the input.')
+    parser.add_argument('--num-frames',
+                        type=int,
+                        default=16,
+                        help='Number of frames to extract from the video.')
+    parser = EngineArgs.add_cli_args(parser)
+    parser = sampling_add_cli_args(parser)
+    args = parser.parse_args()
+    engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)]
+    sampling_args = [
+        param.name
+        for param in list(
+            inspect.signature(SamplingParams).parameters.values()
+        )
+    ]
+    engine_params = {attr: getattr(args, attr) for attr in engine_args}
+    sampling_params = {
+        attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr)
+    }
+    
+    modality = args.modality
+    mm_input = get_multi_modal_input(args)
+    data = mm_input["data"]
+    question = mm_input["question"]
+
+    llm, prompt, stop_token_ids = run_paligemma(question,engine_params,args.modality)
+    sampling_params['stop_token_ids'] = stop_token_ids
+
+    # We set temperature to 0.2 so that outputs can be different
+    # even when all prompts are identical when running batch inference.
+    sampling_params = SamplingParams(**sampling_params)
+
+    assert args.num_prompts > 0
+    if args.num_prompts == 1:
+        # Single inference
+        inputs = {
+            "prompt": prompt,
+            "multi_modal_data": {
+                modality: data
+            },
+        }
+
+    else:
+        # Batch inference
+        inputs = [{
+            "prompt": prompt,
+            "multi_modal_data": {
+                modality: data
+            },
+        } for _ in range(args.num_prompts)]
+
+    outputs = llm.generate(inputs, sampling_params=sampling_params)
+
+    for o in outputs:
+        generated_text = o.outputs[0].text
+        print(generated_text)
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/phi3_v/vllm/README.md b/models/multimodal/vision_language_model/phi3_v/vllm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..97c07cf11119e969674be98ae8f9e10e52f77632
--- /dev/null
+++ b/models/multimodal/vision_language_model/phi3_v/vllm/README.md
@@ -0,0 +1,38 @@
+# Phi3_v (vLLM)
+
+## Model Description
+
+The Phi-3-Vision-128K-Instruct is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0 | 25.12 |
+
+## Model Preparation
+
+### Prepare Resources
+
+- Model: <https://huggingface.co/microsoft/Phi-3-vision-128k-instruct>
+
+```bash
+cp -r ../../vllm_public_assets/ ./
+
+# Download model from the website and make sure the model's path is "data/Phi-3-vision-128k-instruct"
+mkdir data
+```
+
+### Install Dependencies
+
+In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website.
+
+
+## Model Inference
+
+```bash
+export VLLM_ASSETS_CACHE=../vllm/
+python3 offline_inference_vision_language.py --model Phi-3-vision-128k-instruct --max-tokens 256 -tp 4 --trust-remote-code --max-model-len 4096 --temperature 0.0
+```
+
+## Model Results
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/phi3_v/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/phi3_v/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b54c8d39af69827a8993f582b3029fd463c58c0b
--- /dev/null
+++ b/models/multimodal/vision_language_model/phi3_v/vllm/ci/prepare.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/phi3_v/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_model/phi3_v/vllm/offline_inference_vision_language.py
new file mode 100644
index 0000000000000000000000000000000000000000..925dee113772197634895291a8af2f0b6324c760
--- /dev/null
+++ b/models/multimodal/vision_language_model/phi3_v/vllm/offline_inference_vision_language.py
@@ -0,0 +1,148 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This example shows how to use vLLM for running offline inference 
+with the correct prompt format on vision language models.
+
+For most models, the prompt format should follow corresponding examples
+on HuggingFace model repository.
+"""
+import sys
+from pathlib import Path
+import os
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
+import argparse
+import dataclasses
+import inspect
+from vllm.assets.image import ImageAsset
+from vllm.assets.video import VideoAsset
+from vllm import LLM, EngineArgs, SamplingParams
+
+from utils import sampling_add_cli_args
+
+# Phi-3-Vision
+def run_phi3v(question,engine_params,modality):
+    assert modality == "image"
+    prompt = f"<|user|>\n<|image_1|>\n{question}<|end|>\n<|assistant|>\n"  # noqa: E501
+    # Note: The default setting of max_num_seqs (256) and
+    # max_model_len (128k) for this model may cause OOM.
+    # You may lower either to run this example on lower-end GPUs.
+
+    # In this example, we override max_num_seqs to 5 while
+    # keeping the original context length of 128k.
+    llm = LLM(**engine_params)
+    stop_token_ids = None
+    return llm, prompt, stop_token_ids
+
+def get_multi_modal_input(args):
+    """
+    return {
+        "data": image or video,
+        "question": question,
+    }
+    """
+    if args.modality == "image":
+        # Input image and question
+        image = ImageAsset("cherry_blossom").pil_image.convert("RGB")
+        img_question = "What is the content of this image?"
+
+        return {
+            "data": image,
+            "question": img_question,
+        }
+
+    if args.modality == "video":
+        # Input video and question
+        video = VideoAsset(name="sample_demo_1.mp4",
+                           num_frames=args.num_frames).np_ndarrays
+        vid_question = "Why is this video funny?"
+
+        return {
+            "data": video,
+            "question": vid_question,
+        }
+
+    msg = f"Modality {args.modality} is not supported."
+    raise ValueError(msg)
+
+
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--num-prompts',
+                        type=int,
+                        default=1,
+                        help='Number of prompts to run.')
+    parser.add_argument('--modality',
+                        type=str,
+                        default="image",
+                        help='Modality of the input.')
+    parser.add_argument('--num-frames',
+                        type=int,
+                        default=16,
+                        help='Number of frames to extract from the video.')
+    parser = EngineArgs.add_cli_args(parser)
+    parser = sampling_add_cli_args(parser)
+    args = parser.parse_args()
+    engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)]
+    sampling_args = [
+        param.name
+        for param in list(
+            inspect.signature(SamplingParams).parameters.values()
+        )
+    ]
+    engine_params = {attr: getattr(args, attr) for attr in engine_args}
+    sampling_params = {
+        attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr)
+    }
+    
+    modality = args.modality
+    mm_input = get_multi_modal_input(args)
+    data = mm_input["data"]
+    question = mm_input["question"]
+
+    llm, prompt, stop_token_ids = run_phi3v(question,engine_params,args.modality)
+    sampling_params['stop_token_ids'] = stop_token_ids
+
+    # We set temperature to 0.2 so that outputs can be different
+    # even when all prompts are identical when running batch inference.
+    sampling_params = SamplingParams(**sampling_params)
+
+    assert args.num_prompts > 0
+    if args.num_prompts == 1:
+        # Single inference
+        inputs = {
+            "prompt": prompt,
+            "multi_modal_data": {
+                modality: data
+            },
+        }
+
+    else:
+        # Batch inference
+        inputs = [{
+            "prompt": prompt,
+            "multi_modal_data": {
+                modality: data
+            },
+        } for _ in range(args.num_prompts)]
+
+    outputs = llm.generate(inputs, sampling_params=sampling_params)
+
+    for o in outputs:
+        generated_text = o.outputs[0].text
+        print(generated_text)
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/pixtral/vllm/README.md b/models/multimodal/vision_language_model/pixtral/vllm/README.md
index 5ef06c0e888f8ff9cf9c76a10b4aaf3a3da87e4f..94c019119fef8ff341323c4182b89b771f084225 100644
--- a/models/multimodal/vision_language_model/pixtral/vllm/README.md
+++ b/models/multimodal/vision_language_model/pixtral/vllm/README.md
@@ -8,6 +8,7 @@ Pixtral is trained to understand both natural images and documents, achieving 52
 
 | GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
 | :----: | :----: | :----: |
+| MR-V100 | dev-only | 25.12 |
 | MR-V100 | 4.3.0 | 25.09 |
 | MR-V100 | 4.2.0 | 25.06 |
 
@@ -16,11 +17,12 @@ Pixtral is trained to understand both natural images and documents, achieving 52
 ### Prepare Resources
 
 - Model: <https://huggingface.co/mistralai/Pixtral-12B-2409>
+- Model: <https://modelscope.cn/models/neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16>
 
 ```bash
 cp -r ../../vllm_public_assets/ ./
 
-# Download model from the website and make sure the model's path is "data/Aria"
+# Download model from the website and make sure the model's path are "data/Pixtral-12B-2409" "data/Pixtral-Large-Instruct-2411-hf-quantized.w4a16"
 mkdir data
 ```
 
@@ -28,19 +30,15 @@ mkdir data
 
 In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website.
 
-```bash
-# Install libGL
-## CentOS
-yum install -y mesa-libGL
-## Ubuntu
-apt install -y libgl1-mesa-glx
-```
-
 ## Model Inference
 
 ```bash
 export VLLM_ASSETS_CACHE=../vllm/
 python3 offline_inference_vision_language.py --model data/Pixtral-12B-2409 --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --tokenizer-mode 'mistral'
+
+# w4a16
+export VLLM_USE_V1=1
+python3 offline_inference_2411_w4a16.py --model data/Pixtral-Large-Instruct-2411-hf-quantized.w4a16/
 ```
 
 ## Model Results
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/pixtral/vllm/offline_inference_2411_w4a16.py b/models/multimodal/vision_language_model/pixtral/vllm/offline_inference_2411_w4a16.py
new file mode 100644
index 0000000000000000000000000000000000000000..3297d8a0cb27490c3ecc26cabf34972c525d7b48
--- /dev/null
+++ b/models/multimodal/vision_language_model/pixtral/vllm/offline_inference_2411_w4a16.py
@@ -0,0 +1,65 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from vllm.assets.image import ImageAsset
+from vllm import LLM, SamplingParams
+from PIL import Image 
+import argparse
+
+def inference(args):
+    # prepare model
+    llm = LLM(
+        model=args.model,
+        trust_remote_code=True,
+        max_model_len=4096,
+        max_num_seqs=2,
+        tensor_parallel_size = 2,
+        pipeline_parallel_size  = 4,
+        limit_mm_per_prompt={"image": 5},
+    )
+    
+    # prepare inputs
+    question = "请描述这张图片"
+    
+    image = Image.open("./vllm_public_assets/cherry_blossom.jpg")
+    image = image.convert("RGB")
+    inputs = {
+        # "prompt": f"<|user|>\n<|image|>\n{question}<|end|>\n<|assistant|>\n",
+        "prompt": f"<s>[INST]{question}\n[IMG][/INST]",
+        "multi_modal_data": {
+            "image": image
+        },
+    }
+    
+    # generate response
+    print("========== SAMPLE GENERATION ==============")
+    outputs = llm.generate(inputs, SamplingParams(temperature=0.2, max_tokens=1024))
+    print(f"RESPONSE: {outputs[0].outputs[0].text}")
+    print("==========================================")
+    
+    
+def main():
+    parser = argparse.ArgumentParser(description="Example script with --model and --port arguments")
+    parser.add_argument("--model", type=str, default="/data/nlp/Pixtral-Large-Instruct-2411-hf-quantized.w4a16/", help="Model name or path")
+    args = parser.parse_args()
+    
+    
+    inference(args)
+
+ 
+    
+if __name__ == "__main__":
+    main()    
diff --git a/models/multimodal/vision_language_model/step3/vllm/README.md b/models/multimodal/vision_language_model/step3/vllm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ce1df9df1d573642f834fbb7e3a0c1732d34e627
--- /dev/null
+++ b/models/multimodal/vision_language_model/step3/vllm/README.md
@@ -0,0 +1,88 @@
+# Step3 (vLLM)
+
+## Model Description
+
+Step3 is cutting-edge multimodal reasoning model—built on a Mixture-of-Experts architecture with 321B total parameters and 38B active. It is designed end-to-end to minimize decoding costs while delivering top-tier performance in vision–language reasoning. Through the co-design of Multi-Matrix Factorization Attention (MFA) and Attention-FFN Disaggregation (AFD), Step3 maintains exceptional efficiency across both flagship and low-end accelerators.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | dev-only | 25.12 |
+
+## Model Preparation
+
+### Prepare Resources
+
+- Model: <https://www.modelscope.cn/models/stepfun-ai/step3>
+
+```bash
+# Download model from the website and make sure the model's path is "data/step3"
+mkdir data
+```
+
+### Install Dependencies
+
+In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website.
+
+```bash
+wget http://files.deepspark.org.cn:880/deepspark/data/datasets/MMMU_BETA.json
+wget http://files.deepspark.org.cn:880/deepspark/data/datasets/MMSTAR_BETA.json
+pip3 install -r requirements.txt
+```
+
+## Model Inference
+
+### Inference with w8a8
+#### Starting w8a8 server
+```bash
+python3 bf162int8.py --input-bf16-hf-path data/step3 --output-int8-hf-path data/step3_w8a8/
+
+# starting server
+VLLM_DEFAULT_HIGH_RESOLUTION=true VLLM_W8A8_MOE_USE_W4A8=0 VLLM_USE_V1=1 python3 -m vllm.entrypoints.openai.api_server \
+--model data/step3_w8a8/   --max-num-batched-tokens 4096 \
+--gpu-memory-utilization 0.92 --port 12347 \
+--trust-remote-code \
+--disable-cascade-attn --no-enable-prefix-caching  \
+--max-model-len 30720  --seed 42  -tp 8 -pp 2 -dp 1 --max-num-seqs 4 --limit-mm-per-prompt image=5
+```
+#### Testing
+```bash
+curl 127.0.0.1:12347/v1/completions -H "Content-Type: application/json" -d '{"model":"data/step3_w8a8/",
+"prompt":"简单介绍一下上海?",
+"temperature":0.0,
+"max_tokens":128}'
+
+# acc test
+python3 eval_dataset_w8a8.py --dataset_name MMSTAR_BETA --model data/step3_w8a8/  --ip 127.0.0.1 --port 12347 --num_workers 4 
+```
+
+### Inference with w4a8
+#### Starting w4a8 server
+```bash
+python3 bf16Toint4.py --input-fp8-hf-path data/step3/ --output-int8-hf-path data/step3_w4a8_TN/ --group-size -1 --format TN --version 2 
+
+VLLM_DEFAULT_HIGH_RESOLUTION=true VLLM_W8A8_MOE_USE_W4A8=1 VLLM_USE_V1=1 python3 -m vllm.entrypoints.openai.api_server \
+--model data/step3_w4a8_TN/   --max-num-batched-tokens 4096 \
+--gpu-memory-utilization 0.92 --port 12347 \
+--trust-remote-code \
+--disable-cascade-attn  --no-enable-prefix-caching \
+--max-model-len 61440  --seed 42  -tp 4 -pp 4 -dp 1 --max-num-seqs 16 --limit-mm-per-prompt image=5
+```
+
+#### Testing
+```bash
+curl 127.0.0.1:12347/v1/completions -H "Content-Type: application/json" -d '{"model":"data/step3_w4a8_TN/",
+"prompt":"简单介绍一下上海?",
+"temperature":0.0,
+"max_tokens":128}'
+
+# acc test
+python3 eval_dataset.py --dataset_name MMMU_BETA --model data/step3_w4a8_TN/  --ip 127.0.0.1 --port 12347 --num_workers 16
+```
+
+## Model Results
+|Model|MMSTAR_BETA|MMMU_BETA|
+|:---:|:---:|:---:|
+|step3_w8a8|0.710|0.745|
+|step3_w4a8|0.705|0.730|
diff --git a/models/multimodal/vision_language_model/step3/vllm/bf162int8.py b/models/multimodal/vision_language_model/step3/vllm/bf162int8.py
new file mode 100644
index 0000000000000000000000000000000000000000..76a4b3ed3e3e18f47e22b896578d77147ea383f1
--- /dev/null
+++ b/models/multimodal/vision_language_model/step3/vllm/bf162int8.py
@@ -0,0 +1,208 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import json
+from argparse import ArgumentParser
+from glob import glob
+from tqdm import tqdm
+import shutil
+import torch
+from safetensors.torch import load_file, save_file
+
+def weight_quant(v: torch.Tensor,  block_size: int = 128):
+    
+    #TODO Pdd 128 group?
+    if v.dim() == 2:
+        qmax = 127.0
+        abs_max = torch.abs(v).max(dim=1, keepdim=True)[0]  # [rows, 1]
+        scale = abs_max / qmax  # [rows, 1]
+        assert scale.shape == (v.shape[0], 1)
+        quantized = torch.round(v / scale)
+        quantized = torch.clamp(quantized, -qmax, qmax)
+        return quantized.to(torch.int8), scale.to(torch.float32)
+    elif v.dim() == 3:
+        qmax = 127.0
+        scales = torch.empty(v.shape[0], v.shape[1], 1).to(device=v.device, dtype=torch.float32)
+        quantized = torch.empty_like(v, dtype=torch.int8)
+        for i in range(v.shape[0]):
+            abs_max = torch.abs(v[i]).max(dim=1, keepdim=True)[0]
+            scales[i] = abs_max / qmax
+            quantized[i] = torch.round(v[i] / scales[i])
+            quantized[i] = torch.clamp(quantized[i], -qmax, qmax)
+        return quantized, scales
+        
+
+def process_config_weight_map(fp8_path, int8_path):
+    config_path = os.path.join(fp8_path, "config.json")
+    config_save_path = os.path.join(int8_path, "config.json")
+    
+    with open(config_path, "r") as f_open:
+        config = json.load(f_open)
+        # del config["quantization_config"]
+        config["compression_config"] = {"config_groups": {
+                                    "group_0": {
+                                        "input_activations": {
+                                        "block_structure": None,
+                                        "dynamic": True,
+                                        "group_size": None,
+                                        "num_bits": 8,
+                                        "observer": "memoryless",
+                                        "observer_kwargs": {},
+                                        "strategy": "token",
+                                        "symmetric": True,
+                                        "type": "int"
+                                        },
+                                        "output_activations": None,
+                                        "targets": [
+                                        "Linear"
+                                        ],
+                                        "weights": {
+                                        "block_structure": None,
+                                        "dynamic": False,
+                                        "group_size": None,
+                                        "num_bits": 8,
+                                        "observer": "minmax",
+                                        "observer_kwargs": {},
+                                        "strategy": "channel",
+                                        "symmetric": True,
+                                        "type": "int"
+                                        }
+                                    }
+                                    },
+                                    "format": "int-quantized",
+                                    "global_compression_ratio": 1.2405352996226195,
+                                    "ignore": [
+                                    "lm_head"
+                                    ],
+                                    "kv_cache_scheme": None,
+                                    "quant_method": "compressed-tensors",
+                                    "quantization_status": "frozen"
+                                }
+        with open(config_save_path, "w") as f_save:
+            json.dump(config, f_save, indent=4)
+def main(fp8_path, int8_path):
+    """
+    Converts FP8 weights to BF16 and saves the converted weights.
+
+    This function reads FP8 weights from the specified directory, converts them to BF16,
+    and saves the converted weights to another specified directory. It also updates the
+    model index file to reflect the changes.
+
+    Args:
+    fp8_path (str): The path to the directory containing the FP8 weights and model index file.
+    int8_path (str): The path to the directory where the converted int8 weights will be saved.
+
+    Raises:
+    KeyError: If a required scale_inv tensor is missing for a weight.
+
+    Notes:
+    - The function assumes that the FP8 weights are stored in safetensor files.
+    - The function caches loaded safetensor files to optimize memory usage.
+    - The function updates the model index file to remove references to scale_inv tensors.
+    """
+    torch.set_default_dtype(torch.bfloat16)
+    os.makedirs(int8_path, exist_ok=True)
+    model_index_file = os.path.join(fp8_path, "model.safetensors.index.json")
+    with open(model_index_file, "r") as f:
+        model_index = json.load(f)
+    weight_map = model_index["weight_map"]
+    
+    # Cache for loaded safetensor files
+    loaded_files = {}
+    fp8_weight_names = []
+
+    # Helper function to get tensor from the correct file
+    def get_tensor(tensor_name):
+        """
+        Retrieves a tensor from the cached safetensor files or loads it from disk if not cached.
+
+        Args:
+            tensor_name (str): The name of the tensor to retrieve.
+
+        Returns:
+            torch.Tensor: The retrieved tensor.
+
+        Raises:
+            KeyError: If the tensor does not exist in the safetensor file.
+        """
+        file_name = weight_map[tensor_name]
+        if file_name not in loaded_files:
+            file_path = os.path.join(fp8_path, file_name)
+            loaded_files[file_name] = load_file(file_path, device="cuda")
+        return loaded_files[file_name][tensor_name]
+        
+    files = os.listdir(fp8_path)
+    for file in files:
+        if not os.path.isdir(os.path.join(fp8_path, file)) and not file.endswith("safetensors") and file != "model.safetensors.index.json" and file != "config.json":
+          file_path = os.path.join(fp8_path, file)
+          save_file_path = os.path.join(int8_path, file)
+          shutil.copy(file_path, save_file_path)
+    # modify config.json
+    process_config_weight_map(fp8_path, int8_path)
+    safetensor_files = list(glob(os.path.join(fp8_path, "*.safetensors")))
+    
+    safetensor_files.sort()
+    new_weight_map = {}
+    for safetensor_file in tqdm(safetensor_files):
+        file_name = os.path.basename(safetensor_file)
+        current_state_dict = load_file(safetensor_file, device="cuda")
+        loaded_files[file_name] = current_state_dict
+        
+        new_state_dict = {}
+        for weight_name, weight in current_state_dict.items():
+            if (not "vision_model" in weight_name and not "vit" in weight_name) and ("proj" in weight_name or "wq" in weight_name):       
+                try:
+                    int8_v, scale = weight_quant(weight)
+                    new_state_dict[weight_name] = int8_v
+                    new_scale_name = weight_name + "_scale"
+                    new_state_dict[new_scale_name] = scale
+                    new_weight_map[weight_name] = file_name
+                    new_weight_map[new_scale_name] = file_name
+                except KeyError:
+                    print(f"Warning: Missing scale_inv tensor for {weight_name}, skipping conversion")
+                    new_state_dict[weight_name] = weight
+            else:
+                new_state_dict[weight_name] = weight
+                new_weight_map[weight_name] = file_name
+                
+        new_safetensor_file = os.path.join(int8_path, file_name)
+        save_file(new_state_dict, new_safetensor_file)
+        
+        # Memory management: keep only the 2 most recently used files
+        if len(loaded_files) > 1:
+            oldest_file = next(iter(loaded_files))
+            del loaded_files[oldest_file]
+            torch.cuda.empty_cache()
+    # modify model.safetensors.index.json
+    with open(model_index_file, "r") as f:
+        model_index = json.load(f)
+    model_index["weight_map"] = new_weight_map
+    new_model_index_file = os.path.join(int8_path, "model.safetensors.index.json")
+    with open(new_model_index_file, "w", encoding="utf-8") as f:
+        json.dump(model_index, f, indent=2, ensure_ascii=False, sort_keys=True)
+    print(f"model.safetensors.index.json modified and saved to {new_model_index_file}")
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--input-bf16-hf-path", type=str, required=True)
+    parser.add_argument("--output-int8-hf-path", type=str, required=True)
+    parser.add_argument("--split-count", type=int, default=None)
+    args = parser.parse_args()
+    main(args.input_bf16_hf_path, args.output_int8_hf_path)
+
+
+#python3 bf162int8.py --input-bf16-hf-path /data/nlp/ckpt4_bf16/  --output-int8-hf-path /data/nlp/ckpt4_int8/     
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/step3/vllm/bf16Toint4.py b/models/multimodal/vision_language_model/step3/vllm/bf16Toint4.py
new file mode 100644
index 0000000000000000000000000000000000000000..10775a8016b7838790f4bcbbd75357b62e52b7a6
--- /dev/null
+++ b/models/multimodal/vision_language_model/step3/vllm/bf16Toint4.py
@@ -0,0 +1,393 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import json
+from argparse import ArgumentParser
+from glob import glob
+from tqdm import tqdm
+
+import torch
+from safetensors.torch import load_file, save_file
+import ixformer.inference.functions as ixfop
+import shutil
+import json
+
+
+def quant_repack_int4(x, group_size, version, format, isAsymQuant: bool = False):
+    n_experts, n, k = x.shape
+    if version == 1:
+        assert not isAsymQuant
+
+        if group_size == -1:
+            max_x, _ = torch.max(torch.abs(x), dim=-1, keepdim=True)
+            scales = torch.round(max_x / 7)
+            scales[scales < 1e-6] = 1
+            out = torch.round(x / scales).clamp(-8, 7).to(torch.int8)
+        else:
+            x = x.view(n_experts, -1, group_size)
+            max_x, _ = torch.max(torch.abs(x), dim=-1, keepdim=True)
+            scales = torch.round(max_x / 7)
+            scales[scales < 1e-6] = 1
+            out = torch.round(x / scales).clamp(-8, 7).to(torch.int8)
+
+        out = out.view(n_experts, n, k)
+
+        if format[0] == "N":
+            out = out.transpose(-2, -1).contiguous()  # NT (num_experts, k , n)
+            out = out.reshape(n_experts, k // 32, 2, 16, n // 32, 2, 16)
+            out = out.view(n_experts, k // 32, 2, 16, n // 32, 2, 16)
+            out = out.permute(0, 1, 5, 3, 4, 2, 6).contiguous().view(n_experts, k, n)
+
+        ## rearange 32 token
+        shape = out.shape
+        out = out.view(shape[0], shape[1], shape[-1] // 32, 32)
+        out_tmp = out.new_empty(shape[0], shape[1], shape[-1] // 32, 16)
+        for i in range(16):
+            sign_low_4bit = (out[:, :, :, i] < 0).to(torch.int8)
+            low_4bit = sign_low_4bit * 8 + (out[:, :, :, i] & 0x07)
+            high_4bit = out[:, :, :, i + 16] << 4
+            out_tmp[:, :, :, i] = high_4bit + low_4bit
+        out = out_tmp.view(shape[0], shape[1], shape[-1] // 2).contiguous()
+
+        scales = (
+            scales.view(n_experts, n, k // group_size).permute(0, 2, 1).contiguous()
+            if group_size != -1
+            else scales.view(n_experts, n)
+        )
+
+        return out, scales, None
+
+    if version == 2:
+        """
+        For group_size == -1 (per-channel), the default scale factor is 18 since
+        127 / 7 = 18, for quantization with clip, the scale can be set to 16, 17, etc.
+        the alpha in ixinfer_gemm_helper need to be set to scale / 16.0, and the ixformer
+        need to be rebuilt.
+        """
+        if group_size == -1:
+            out = torch.round(x.cpu() / 18).clamp(-8, 7).to(torch.int8)
+        else:
+            x = x.view(n_experts, -1, group_size)
+            if isAsymQuant:
+                max_x, _ = torch.max(x, dim=-1, keepdim=True)
+                min_x, _ = torch.min(x, dim=-1, keepdim=True)
+                scales = ((max_x.to(torch.float32) - min_x.to(torch.float32)) / 15).to(
+                    torch.int8
+                )
+                zeros = (-min_x / scales - 8).to(
+                    torch.int8
+                )  # weight use int4 not uint4, and zero use int8
+                out = (x / scales + zeros).clamp(-8, 7).to(torch.int8)
+            else:
+                max_x, _ = torch.max(torch.abs(x), dim=-1, keepdim=True)
+                scales = torch.round(max_x / 7)
+                scales[scales < 1e-6] = 1
+                scales = scales.to(torch.int8)
+                #.cpu() avoid oom
+                out = torch.round(x.cpu() / scales.cpu()).clamp(-8, 7).to(torch.int8)
+                scales = scales.to(x.device)
+                
+            out = out.view(n_experts, n, k).contiguous()
+
+        if format[0] == "N":
+            out = out.transpose(-2, -1).contiguous()  # NT (num_experts, k , n)
+            out = out.reshape(n_experts, k // 32, 2, 16, n // 32, 2, 16)
+            out = out.view(n_experts, k // 32, 2, 16, n // 32, 2, 16)
+            out = out.permute(0, 1, 5, 3, 4, 2, 6).contiguous().view(n_experts, k, n)
+            out = out.to(x.device)
+
+        ## rearange 32 token
+        shape = out.shape
+        out = out.view(shape[0], shape[1], shape[-1] // 32, 32)
+        out_tmp = out.new_empty(shape[0], shape[1], shape[-1] // 32, 16)
+        for i in range(16):
+            sign_low_4bit = (out[:, :, :, i] < 0).to(torch.int8)
+            low_4bit = sign_low_4bit * 8 + (out[:, :, :, i] & 0x07)
+            high_4bit = out[:, :, :, i + 16] << 4
+            out_tmp[:, :, :, i] = high_4bit + low_4bit
+        out = out_tmp.view(shape[0], shape[1], shape[-1] // 2).contiguous()
+
+        if group_size == -1:
+            return out, None, None
+
+        scales = scales.to(torch.uint8)
+        scales_4i8pack = scales.clone().to(torch.int32)
+        for i in range(3):
+            scales_4i8pack <<= 8
+            scales_4i8pack |= scales
+        scales_4i8pack = (
+            scales_4i8pack.view(n_experts, n, k // group_size)
+            .permute(0, 2, 1)
+            .contiguous()
+        )
+
+        if not isAsymQuant:
+            return out, scales_4i8pack, None
+
+        zeros = zeros.to(torch.uint8)
+        zeros_4i8pack = zeros.clone().to(torch.int32)
+        for i in range(3):
+            zeros_4i8pack <<= 8
+            zeros_4i8pack |= zeros
+        zeros_4i8pack = (
+            zeros_4i8pack.view(n_experts, n, k // group_size)
+            .permute(0, 2, 1)
+            .contiguous()
+        )
+
+        return out, scales_4i8pack, zeros_4i8pack
+
+
+
+def weight_quant(v: torch.Tensor,  block_size: int = 128):
+    
+    #TODO Pdd 128 group?
+    if v.dim() == 2:
+        qmax = 127.0
+        abs_max = torch.abs(v).max(dim=1, keepdim=True)[0]  # [rows, 1]
+        scale = abs_max / qmax  # [rows, 1]
+        assert scale.shape == (v.shape[0], 1)
+        quantized = torch.round(v / scale)
+        quantized = torch.clamp(quantized, -qmax, qmax)
+        return quantized.to(torch.int8), scale.to(torch.float32)
+    elif v.dim() == 3:
+        qmax = 127.0
+        scales = torch.empty(v.shape[0], v.shape[1], 1).to(device=v.device, dtype=torch.float32)
+        quantized = torch.empty_like(v, dtype=torch.int8)
+        for i in range(v.shape[0]):
+            abs_max = torch.abs(v[i]).max(dim=1, keepdim=True)[0]
+            scales[i] = abs_max / qmax
+            quantized[i] = torch.round(v[i] / scales[i])
+            quantized[i] = torch.clamp(quantized[i], -qmax, qmax)
+        return quantized, scales
+
+
+
+def weight_dequant_moe(v: torch.Tensor,  block_size: int = 128, group_size=-1, format="TN", symmetric=True, version=2):
+    if v.dim() == 2:
+        qmax = 127.0
+        abs_max = torch.abs(v).max(dim=1, keepdim=True)[0]  # [rows, 1]
+        scales = abs_max / qmax  # [rows, 1]
+        assert scales.shape == (v.shape[0], 1)
+        quantized = torch.round(v / scales)
+        quantized = torch.clamp(quantized, -qmax, qmax)
+        quantized = quantized.to(torch.int8)
+    elif v.dim() == 3:
+        qmax = 127.0
+        scales = torch.empty(v.shape[0], v.shape[1], 1 ).to(device=v.device, dtype=torch.float32)
+        quantized = torch.empty_like(v, dtype=torch.int8)
+        for i in range(v.shape[0]):
+            abs_max = torch.abs(v[i]).max(dim= 1, keepdim=True)[0]
+            scales[i] = abs_max / qmax
+            quantized[i] = torch.round(v[i] / scales[i])
+            quantized[i] = torch.clamp(quantized[i], -qmax, qmax)    
+        quantized = quantized.to(torch.int8)
+        scales = scales.transpose(-2, -1).contiguous()  #  (48, 5120, 1) → (48, 1, 5120) #NN 
+    assert quantized.dim() in (2, 3), f"Expected quantized to have 2 or 3 dimensions, but got {quantized.dim()}"        
+    if  quantized.dim() == 2:
+        quantized = quantized.unsqueeze(0)            
+    i4_weights, i8scales, i8zeros = quant_repack_int4(quantized, group_size, version, format, not symmetric)
+    
+    return i4_weights.squeeze(0), scales.to(torch.float32), i8scales, i8zeros
+
+
+
+
+
+def main(fp8_path, int8_path, group_size, format, symmetric, version, split_count):
+    """
+    Converts FP8 weights to BF16 and saves the converted weights.
+
+    This function reads FP8 weights from the specified directory, converts them to BF16,
+    and saves the converted weights to another specified directory. It also updates the
+    model index file to reflect the changes.
+
+    Args:
+    fp8_path (str): The path to the directory containing the FP8 weights and model index file.
+    int8_path (str): The path to the directory where the converted int8 weights will be saved.
+
+    Raises:
+    KeyError: If a required scale_inv tensor is missing for a weight.
+
+    Notes:
+    - The function assumes that the FP8 weights are stored in safetensor files.
+    - The function caches loaded safetensor files to optimize memory usage.
+    - The function updates the model index file to remove references to scale_inv tensors.
+    """
+    torch.set_default_dtype(torch.bfloat16)
+    os.makedirs(int8_path, exist_ok=True)
+    model_index_file = os.path.join(fp8_path, "model.safetensors.index.json")
+    with open(model_index_file, "r") as f:
+        model_index = json.load(f)
+    weight_map = model_index["weight_map"]
+    
+    # Cache for loaded safetensor files
+    loaded_files = {}
+    fp8_weight_names = []
+
+
+    safetensor_files = list(glob(os.path.join(fp8_path, "*.safetensors")))
+    safetensor_files.sort()
+    
+    new_weight_map = {}
+    all_safetensor = safetensor_files
+    all_files = list(glob(os.path.join(fp8_path, "*")))
+    if split_count is None:
+        
+        safetensor_files = safetensor_files
+    elif split_count == 1:
+        safetensor_files = safetensor_files[:-2]
+    else:
+        safetensor_files = safetensor_files[-2:]
+    
+    for safetensor_file in tqdm(safetensor_files):
+        file_name = os.path.basename(safetensor_file)
+        current_state_dict = load_file(safetensor_file, device="cuda")
+        loaded_files[file_name] = current_state_dict
+        
+        new_state_dict = {}
+        for weight_name, weight in current_state_dict.items():
+            
+            if (not "vision_model" in weight_name and not "vit" in weight_name and not "moe" in weight_name) and ("proj" in weight_name or "wq" in weight_name):       
+                
+                int8_v, scale = weight_quant(weight)
+                new_state_dict[weight_name] = int8_v
+                new_scale_name = weight_name + "_scale"
+                new_state_dict[new_scale_name] = scale
+                
+                new_weight_map[weight_name] = file_name
+                new_weight_map[new_scale_name] = file_name
+
+            elif ("moe" in weight_name) and ("proj" in weight_name):
+                
+                i4_weights, scale, i8scales, i8zeros = weight_dequant_moe(weight, 128, group_size, format, symmetric, version)
+                #scale:[num_experts, 1, out_feature]
+                #i4_weights:[num_experts, out_feature//2, in_future]
+                
+                new_state_dict[weight_name] = i4_weights
+    
+                sacle_name = weight_name.replace("weight","weight_scale")
+                new_state_dict[sacle_name] = scale
+                
+                new_weight_map[weight_name] = file_name
+                new_weight_map[sacle_name] = file_name
+                
+                
+                if i8scales is not None:
+                    i8scales = i8scales.squeeze_(0)
+                    assert i8scales.dim() == 2
+                    
+                    i8scales_name = weight_name.replace("weight","i8_weight_scale")
+                    new_state_dict[i8scales_name] = i8scales
+                    new_weight_map[i8scales_name] = file_name
+                    
+                    
+                if i8zeros is not None:
+                    i8zeros = i8zeros.squeeze_(0)
+                    assert i8zeros.dim() == 2
+                    i8zeros_name = weight_name.replace("weight","i8_weight_zero")
+                    new_state_dict[i8zeros_name] = i8zeros
+                    new_weight_map[i8zeros_name] = file_name
+                    
+            else:
+                new_state_dict[weight_name] = weight
+                new_weight_map[weight_name] = file_name
+                      
+        new_safetensor_file = os.path.join(int8_path, file_name)
+        save_file(new_state_dict, new_safetensor_file)
+        # Memory management: keep only the 2 most recently used files
+        if len(loaded_files) > 1:
+            oldest_file = next(iter(loaded_files))
+            del loaded_files[oldest_file]
+            torch.cuda.empty_cache()
+            
+    other_files = list(set(all_files) - set(all_safetensor))
+    for other_file in other_files:
+        if os.path.isfile(other_file):
+            name = other_file.rsplit("/", 1)[1]
+            shutil.copy(os.path.join(other_file),
+                        os.path.join(int8_path, name))
+    
+    compression_config = {
+        "config_groups": {
+            "group_0": {
+                "input_activations": {
+                    "block_structure": None,
+                    "dynamic": True,
+                    "group_size": None, 
+                    "num_bits": 8,
+                    "observer": "memoryless",
+                    "observer_kwargs": {},
+                    "strategy": "token",
+                    "symmetric": True,
+                    "type": "int"
+                },
+                "output_activations": None,
+                "targets": [
+                    "Linear"
+                ],
+                "weights": {
+                    "block_structure": None,
+                    "dynamic": False,
+                    "group_size": None if group_size==-1 else group_size,
+                    "num_bits": 8,
+                    "observer": "minmax",
+                    "observer_kwargs": {},
+                    "strategy": "channel" if group_size == -1 else "group",
+                    "symmetric": bool(symmetric),
+                    "type": "int"
+                }
+            }
+        },
+        "format": "int-quantized",
+        "global_compression_ratio": 1.0,
+        "ignore": [
+            "lm_head"
+        ],
+        "kv_cache_scheme": None,
+        "quant_method": "compressed-tensors",
+        "quantization_status": "frozen"
+    }
+    
+    with open(os.path.join(int8_path, "config.json"), encoding="utf-8") as file:
+        configs:dict = json.loads(file.read())
+        # configs.pop("quantization_config")
+        configs["compression_config"] = compression_config
+    with open(os.path.join(int8_path, "config.json"), encoding="utf-8", mode="w") as f:
+        json.dump(configs, f)
+        
+        
+    with open(model_index_file, "r") as f:
+        model_index = json.load(f)
+    model_index["weight_map"] = new_weight_map
+    new_model_index_file = os.path.join(int8_path, "model.safetensors.index.json")
+    with open(new_model_index_file, "w", encoding="utf-8") as f:
+        json.dump(model_index, f, indent=2, ensure_ascii=False, sort_keys=True)
+    print(f"model.safetensors.index.json modified and saved to {new_model_index_file}")    
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--input-fp8-hf-path", type=str, required=True)
+    parser.add_argument("--output-int8-hf-path", type=str, required=True)
+    parser.add_argument("--group-size", type=int, default=-1)
+    parser.add_argument("--format", type=str, default="TN")
+    parser.add_argument("--symmetric", type=bool, default=True)
+    parser.add_argument("--version", type=int, default=2)
+    parser.add_argument("--split-count", type=int, default=None)
+    args = parser.parse_args()
+    main(args.input_fp8_hf_path, args.output_int8_hf_path, args.group_size, args.format, args.symmetric, args.version, args.split_count)
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/step3/vllm/eval_dataset.py b/models/multimodal/vision_language_model/step3/vllm/eval_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9abd99d297e333fe74d7dc75b34266759cae1c07
--- /dev/null
+++ b/models/multimodal/vision_language_model/step3/vllm/eval_dataset.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+import re
+import copy
+import json
+import fire
+import requests
+from loguru import logger
+from tqdm import tqdm
+
+
+supported_dataset = {
+    "MMMU_BETA": "MMMU_BETA.json",
+    "MMSTAR_BETA": "MMSTAR_BETA.json",
+}
+
+
+def move_question_to_front(data):
+    text_items = [item for item in data if item.get("type") == "text"]
+    other_items = [item for item in data if item.get("type") != "text"]
+    return text_items + other_items
+
+
+def batch_processing(func, arg_list, num_workers=16):
+    all_data = []
+    try:
+        from concurrent.futures import ProcessPoolExecutor, as_completed
+  
+        with ProcessPoolExecutor(max_workers=num_workers) as executor:
+            futures = [executor.submit(func, arg) for arg in arg_list]
+            for future in tqdm(as_completed(futures), total=len(futures)):
+                result = future.result()
+                all_data.append(result)
+  
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        raise e
+    finally:
+        return all_data
+
+
+def load_dataset(dataset_name):
+    assert dataset_name in supported_dataset, f"Dataset {dataset_name} not supported"
+    logger.info(f"Loading dataset {dataset_name}")
+    dataset = json.load(open(supported_dataset[dataset_name]))
+    return dataset
+
+    
+def process_messages(messages):
+    messages = copy.deepcopy(messages)
+    # drop ground truth
+    if messages[-1]['role'].lower() == 'assistant':
+        messages.pop(-1)
+    return messages
+
+
+def forward(model, history, infer_params, base_url, timeout, retry_times=10):
+    data = {
+        "model": model,
+        "messages": history,
+        **infer_params,
+    }
+    resp = requests.post(base_url, json=data, timeout=timeout, proxies={"http": [], "https": []})
+    if resp.status_code != 200:
+        logger.warning(f"Error: [{resp.status_code}] {resp.text}, retry {retry_times} times")
+        if retry_times > 0:
+            return forward(model, history, infer_params, base_url, timeout, retry_times - 1)
+        else:
+            raise Exception(f"Error: [{resp.status_code}] {resp.text}")
+    else:
+        return resp.json()['choices'][0]['message']['content']
+
+
+def post_process(raw_output: str) -> str:
+    model_ans = raw_output.split("</think>")[-1].strip()
+    model_ans = model_ans.split('answer')[-1].strip()
+    model_ans = model_ans.split('Answer')[-1].strip()
+    
+    if '\\boxed{' in model_ans:
+        matches = re.findall(r'\\boxed\{(.*?)\}', model_ans)
+        if matches:
+            model_ans = matches[-1]
+
+    extracted_ans = re.findall(r'[A-Z]', model_ans)
+    if len(extracted_ans) > 0:
+        model_ans = extracted_ans[0]
+    else:
+        model_ans = model_ans
+    
+    return model_ans
+
+
+def is_correct(model_ans: str, gt_ans: str) -> bool:
+    if len(str(gt_ans.strip())) == 1 and str(gt_ans.strip()) >= 'A' and str(gt_ans.strip()) <= 'Z':
+        pass
+    else:
+        gt_ans = "A"
+
+    if gt_ans.lower() == model_ans.lower():
+        return True
+    else:
+        return False
+
+
+def inference_one_sample(args):
+    model, data, infer_params, base_url, timeout = args
+    messages = process_messages(data)
+    model_ans = forward(model, messages, infer_params, base_url, timeout)
+    model_ans = post_process(model_ans)
+    score = is_correct(model_ans, data[-1]['content'])
+    return model_ans, score
+
+
+def main(dataset_name, model, ip, port, timeout=3600, num_workers=16, output_path=None):
+    infer_params = {
+        "temperature": 0.6,
+        "top_p": 0.95,
+        "top_k": 40,
+        "max_tokens": 40960,
+        "seed":42
+    }
+
+    base_url = f"http://{ip}:{port}/v1/chat/completions"
+
+    if not output_path:
+        output_path = f"{model}_{dataset_name}.json"
+
+    dataset = load_dataset(dataset_name)
+
+    task_args = []
+    for data in dataset:
+        if dataset_name =="MMSTAR_BETA":
+            data[0]["content"] = move_question_to_front(data[0]["content"]) 
+        task_args.append([model, data, infer_params, base_url, timeout])
+
+    logger.info(f"begin to inference")
+    results = batch_processing(inference_one_sample, task_args, num_workers=num_workers)
+
+    raw_output_list = []
+    score_list = []
+    for result in results:
+        raw_output_list.append(result[0])
+        score_list.append(result[1])
+
+    total_score = sum(score_list) / len(score_list)
+
+    logger.info(f"Inference finished, Total score: {total_score}")
+    logger.info(f"Inference samples: {len(results)}, total samples: {len(dataset)}")
+    logger.info(f"Saving results to {output_path}")
+
+    with open(output_path, 'w') as f:
+        json.dump({
+            "total_score": total_score,
+            "raw_output_list": raw_output_list,
+            "score_list": score_list,
+        }, f, indent=4, ensure_ascii=False)
+
+        
+if __name__ == "__main__":
+    fire.Fire(main)
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/step3/vllm/eval_dataset_w8a8.py b/models/multimodal/vision_language_model/step3/vllm/eval_dataset_w8a8.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d7e196cb52026ed664793668bcebbbcdaee855a
--- /dev/null
+++ b/models/multimodal/vision_language_model/step3/vllm/eval_dataset_w8a8.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+import re
+import copy
+import json
+import fire
+import requests
+from loguru import logger
+from tqdm import tqdm
+
+
+supported_dataset = {
+    "MMMU_BETA": "MMMU_BETA.json",
+    "MMSTAR_BETA": "MMSTAR_BETA.json",
+}
+
+
+def move_question_to_front(data):
+    text_items = [item for item in data if item.get("type") == "text"]
+    other_items = [item for item in data if item.get("type") != "text"]
+    return text_items + other_items
+
+
+def batch_processing(func, arg_list, num_workers=16):
+    all_data = []
+    try:
+        from concurrent.futures import ProcessPoolExecutor, as_completed
+  
+        with ProcessPoolExecutor(max_workers=num_workers) as executor:
+            futures = [executor.submit(func, arg) for arg in arg_list]
+            for future in tqdm(as_completed(futures), total=len(futures)):
+                result = future.result()
+                all_data.append(result)
+  
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        raise e
+    finally:
+        return all_data
+
+
+def load_dataset(dataset_name):
+    assert dataset_name in supported_dataset, f"Dataset {dataset_name} not supported"
+    logger.info(f"Loading dataset {dataset_name}")
+    dataset = json.load(open(supported_dataset[dataset_name]))
+    return dataset
+
+    
+def process_messages(messages):
+    messages = copy.deepcopy(messages)
+    # drop ground truth
+    if messages[-1]['role'].lower() == 'assistant':
+        messages.pop(-1)
+    return messages
+
+
+def forward(model, history, infer_params, base_url, timeout, retry_times=10):
+    data = {
+        "model": model,
+        "messages": history,
+        **infer_params,
+    }
+    resp = requests.post(base_url, json=data, timeout=timeout, proxies={"http": [], "https": []})
+    if resp.status_code != 200:
+        logger.warning(f"Error: [{resp.status_code}] {resp.text}, retry {retry_times} times")
+        if retry_times > 0:
+            return forward(model, history, infer_params, base_url, timeout, retry_times - 1)
+        else:
+            raise Exception(f"Error: [{resp.status_code}] {resp.text}")
+    else:
+        return resp.json()['choices'][0]['message']['content']
+
+
+def post_process(raw_output: str) -> str:
+    model_ans = raw_output.split("</think>")[-1].strip()
+    model_ans = model_ans.split('answer')[-1].strip()
+    model_ans = model_ans.split('Answer')[-1].strip()
+    
+    if '\\boxed{' in model_ans:
+        matches = re.findall(r'\\boxed\{(.*?)\}', model_ans)
+        if matches:
+            model_ans = matches[-1]
+
+    extracted_ans = re.findall(r'[A-Z]', model_ans)
+    if len(extracted_ans) > 0:
+        model_ans = extracted_ans[0]
+    else:
+        model_ans = model_ans
+    
+    return model_ans
+
+
+def is_correct(model_ans: str, gt_ans: str) -> bool:
+    if len(str(gt_ans.strip())) == 1 and str(gt_ans.strip()) >= 'A' and str(gt_ans.strip()) <= 'Z':
+        pass
+    else:
+        gt_ans = "A"
+
+    if gt_ans.lower() == model_ans.lower():
+        return True
+    else:
+        return False
+
+
+def inference_one_sample(args):
+    model, data, infer_params, base_url, timeout = args
+    messages = process_messages(data)
+    model_ans = forward(model, messages, infer_params, base_url, timeout)
+    model_ans = post_process(model_ans)
+    score = is_correct(model_ans, data[-1]['content'])
+    return model_ans, score
+
+
+def main(dataset_name, model, ip, port, timeout=3600, num_workers=16, output_path=None):
+    infer_params = {
+        "temperature": 0.6,
+        "top_p": 0.95,
+        "top_k": 40,
+        "max_tokens": 28672,
+        "seed":42
+    }
+
+    base_url = f"http://{ip}:{port}/v1/chat/completions"
+
+    if not output_path:
+        output_path = f"{model}_{dataset_name}.json"
+
+    dataset = load_dataset(dataset_name)
+
+    task_args = []
+    for data in dataset:
+        if dataset_name =="MMSTAR_BETA":
+            data[0]["content"] = move_question_to_front(data[0]["content"]) 
+        task_args.append([model, data, infer_params, base_url, timeout])
+
+    logger.info(f"begin to inference")
+    results = batch_processing(inference_one_sample, task_args, num_workers=num_workers)
+
+    raw_output_list = []
+    score_list = []
+    for result in results:
+        raw_output_list.append(result[0])
+        score_list.append(result[1])
+
+    total_score = sum(score_list) / len(score_list)
+
+    logger.info(f"Inference finished, Total score: {total_score}")
+    logger.info(f"Inference samples: {len(results)}, total samples: {len(dataset)}")
+    logger.info(f"Saving results to {output_path}")
+
+    with open(output_path, 'w') as f:
+        json.dump({
+            "total_score": total_score,
+            "raw_output_list": raw_output_list,
+            "score_list": score_list,
+        }, f, indent=4, ensure_ascii=False)
+
+        
+if __name__ == "__main__":
+    fire.Fire(main)
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/step3/vllm/requirements.txt b/models/multimodal/vision_language_model/step3/vllm/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72b5b8dd4385a1afaf4bb9d91595f7fb1a62be80
--- /dev/null
+++ b/models/multimodal/vision_language_model/step3/vllm/requirements.txt
@@ -0,0 +1,5 @@
+fire
+tqdm
+loguru
+requests
+tabulate
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/xlmroberta/vllm/README.md b/models/multimodal/vision_language_model/xlmroberta/vllm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..75d76dc23255580a049e39b35c3f6024cc40de43
--- /dev/null
+++ b/models/multimodal/vision_language_model/xlmroberta/vllm/README.md
@@ -0,0 +1,43 @@
+# XLMRoberta (vLLM)
+
+## Model Description
+
+XLM-RoBERTa is a multilingual version of RoBERTa. It is pre-trained on 2.5TB of filtered CommonCrawl data containing 100 languages.
+
+RoBERTa is a transformers model pretrained on a large corpus in a self-supervised fashion. This means it was pretrained on the raw texts only, with no humans labelling them in any way (which is why it can use lots of publicly available data) with an automatic process to generate inputs and labels from those texts.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0 | 25.12 |
+
+## Model Preparation
+
+### Prepare Resources
+
+- Model: <https://huggingface.co/BAAI/bge-reranker-v2-m3>
+- Model: <https://huggingface.co/intfloat/multilingual-e5-large> base model is xlm-roberta-large
+
+```bash
+# Download model from the website and make sure the model's path is "data/bge-reranker-v2-m3" "data/multilingual-e5-large"
+mkdir data
+```
+
+### Install Dependencies
+
+In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website.
+
+## Model Inference
+
+### Sentence Pair Scoring Modeling
+```bash
+python3 offline_inference_scoring.py --model data/bge-reranker-v2-m3 --task "score" --tensor-parallel-size 1
+```
+
+### Text Embedding
+```bash
+python3 offline_inference_embedding.py --model data/multilingual-e5-large -tp 2
+```
+
+## Model Results
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/xlmroberta/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/xlmroberta/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d6fa2d8c4444e68238722ee515b6237608fed3bd
--- /dev/null
+++ b/models/multimodal/vision_language_model/xlmroberta/vllm/ci/prepare.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/xlmroberta/vllm/offline_inference_embedding.py b/models/multimodal/vision_language_model/xlmroberta/vllm/offline_inference_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..88974ed0930e9d3998de456ea491f8453f58d2f2
--- /dev/null
+++ b/models/multimodal/vision_language_model/xlmroberta/vllm/offline_inference_embedding.py
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import sys
+from pathlib import Path
+import os
+
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
+import argparse
+import dataclasses
+import inspect
+import logging
+import time
+
+import torch
+from utils import load_chat_template, sampling_add_cli_args
+from vllm import LLM, EngineArgs, SamplingParams
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser = EngineArgs.add_cli_args(parser)
+    parser = sampling_add_cli_args(parser)
+    args = parser.parse_args()
+
+    engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)]
+    sampling_args = [
+        param.name
+        for param in list(
+            inspect.signature(SamplingParams).parameters.values()
+        )
+    ]
+    engine_params = {attr: getattr(args, attr) for attr in engine_args}
+    sampling_params = {
+        attr: getattr(args, attr) for attr in sampling_args if args.__contains__(attr)
+    }
+
+    model_name = os.path.dirname(args.model).rsplit("/")[-1]
+
+    # Sample prompts.
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ]
+
+    # Create a sampling params object.
+    sampling_params = SamplingParams(**sampling_params)
+
+    # Create an LLM.
+    llm = LLM(**engine_params)
+
+    # skip process chat template
+    # Generate embedding. The output is a list of EmbeddingRequestOutputs.
+    outputs = llm.encode(prompts)
+    # Print the outputs.
+    for output in outputs:
+        print(output.outputs.embedding) # list of hidden_size floats
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/xlmroberta/vllm/offline_inference_scoring.py b/models/multimodal/vision_language_model/xlmroberta/vllm/offline_inference_scoring.py
new file mode 100644
index 0000000000000000000000000000000000000000..05c9ed0b4ed1282a261738088fbc532c914b337c
--- /dev/null
+++ b/models/multimodal/vision_language_model/xlmroberta/vllm/offline_inference_scoring.py
@@ -0,0 +1,47 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from vllm import LLM
+import argparse
+from vllm import LLM, EngineArgs
+import dataclasses
+
+if __name__ == "__main__":
+    
+    parser = argparse.ArgumentParser()
+    parser = EngineArgs.add_cli_args(parser)
+    args = parser.parse_args()
+    engine_args = [attr.name for attr in dataclasses.fields(EngineArgs)]
+    engine_params = {attr: getattr(args, attr) for attr in engine_args}
+    # Sample prompts.
+    text_1 = "What is the capital of France?"
+    texts_2 = [
+        "The capital of Brazil is Brasilia.", "The capital of France is Paris."
+    ]
+
+    # Create an LLM.
+    # You should pass task="score" for cross-encoder models
+    model = LLM(**engine_params)
+
+    # Generate scores. The output is a list of ScoringRequestOutputs.
+    outputs = model.score(text_1, texts_2)
+
+    # Print the outputs.
+    for text_2, output in zip(texts_2, outputs):
+        score = output.outputs.score
+        print(f"Pair: {[text_1, text_2]!r} | Score: {score}")
+        
diff --git a/models/nlp/llm/qwen3-235b/vllm/README.md b/models/nlp/llm/qwen3-235b/vllm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..45955c4f823b3dcf09a966c7c287ead357e8b057
--- /dev/null
+++ b/models/nlp/llm/qwen3-235b/vllm/README.md
@@ -0,0 +1,72 @@
+# Qwen3_Moe (vLLM)
+
+## Model Description
+
+Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features:
+
+- Uniquely support of seamless switching between thinking mode (for complex logical reasoning, math, and coding) and non-thinking mode (for efficient, general-purpose dialogue) within single model, ensuring optimal performance across various scenarios.
+- Significantly enhancement in its reasoning capabilities, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning.
+- Superior human preference alignment, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience.
+- Expertise in agent capabilities, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks.
+Support of 100+ languages and dialects with strong capabilities for multilingual instruction following and translation.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | dev-only | 25.12 |
+
+## Model Preparation
+
+### Prepare Resources
+
+- Model: <https://www.modelscope.cn/models/Qwen/Qwen3-235B-A22B>
+- Model: <https://www.modelscope.cn/models/swift/Qwen3-235B-A22B-Instruct-2507-AWQ>
+
+```bash
+# BF16到W4A8量化模型
+python3 bf16ToInt4.py --input-fp8-hf-path /path/to/Qwen3-235B-A22B --output-int8-hf-path ./Qwen3-235B-A22B-w4a8-TN --group-size -1 --format TN --version 2 
+```
+
+### Install Dependencies
+
+In order to run the model smoothly, you need to get the sdk from [resource
+center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website.
+
+## Model Inference
+
+### Qwen3-235B-A22B-Instruct
+#### Starting Server
+```bash
+VLLM_USE_V1=0 python3 -m vllm.entrypoints.openai.api_server \
+--model /path/to/Qwen3-235B-A22B-Instruct-2507-AWQ/ \
+--gpu-memory-utilization 0.92 --port 12347 \
+--trust-remote-code \
+--disable-cascade-attn \
+--max-model-len 262144 --seed 42 -tp 4 -pp 4 -dp 1 --max-num-seqs 8
+```
+
+#### Testing
+```bash
+curl 127.0.0.1:12347/v1/completions -H "Content-Type: application/json" -d '{"model":"/path/to/Qwen3-235B-A22B-Instruct-2507-AWQ/",
+"prompt":"简单介绍一下Qwen模型?",
+"temperature":0.0,
+"max_tokens":128}'
+```
+
+### Qwen3-235B-A22B-W4A8
+#### Starting Server
+```bash
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15" VLLM_W8A8_MOE_USE_W4A8=1 VLLM_USE_V1=1 \ python3 -m vllm.entrypoints.openai.api_server --model ./Qwen3-235B-A22B-w4a8-TN \
+--gpu-memory-utilization 0.92 --port 12347 \
+--trust-remote-code \
+--disable-cascade-attn \ 
+--seed 42 -tp 4 -pp 4 -dp 1 --max-num-seqs 8
+```
+
+#### Testing
+```bash
+curl 127.0.0.1:12347/v1/completions -H "Content-Type: application/json" -d '{"model":"./Qwen3-235B-A22B-w4a8-TN", "prompt":"简单介绍一下Qwen3模型?", "temperature":0.0, "max_tokens":128}'
+```
+
+## Model Results
\ No newline at end of file
diff --git a/models/nlp/llm/qwen3-235b/vllm/bf16ToInt4.py b/models/nlp/llm/qwen3-235b/vllm/bf16ToInt4.py
new file mode 100644
index 0000000000000000000000000000000000000000..039fb89d86b51d11466c384a5ff4546b0b39c33e
--- /dev/null
+++ b/models/nlp/llm/qwen3-235b/vllm/bf16ToInt4.py
@@ -0,0 +1,232 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import json
+from argparse import ArgumentParser
+from glob import glob
+from tqdm import tqdm
+
+import torch
+from safetensors.torch import load_file, save_file
+import ixformer.inference.functions as ixfop
+import shutil
+import json
+
+def weight_quant(v: torch.Tensor):
+    assert v.dim() == 2
+    qmax = 127.0
+    abs_max = torch.abs(v).max(dim=1, keepdim=True)[0]  # [rows, 1]
+    scale = abs_max / qmax  # [rows, 1]
+    assert scale.shape == (v.shape[0], 1)
+    quantized = torch.round(v / scale)
+    quantized = torch.clamp(quantized, -qmax, qmax)
+    return quantized.to(torch.int8), scale.to(torch.float32)
+
+def weight_quant_moe(v: torch.Tensor,  block_size: int = 128, group_size=-1, format="TN", symmetric=True, version=2):
+    assert v.dim() == 2
+    qmax = 127.0
+    abs_max = torch.abs(v).max(dim=1, keepdim=True)[0]  # [rows, 1]
+    scale = abs_max / qmax  # [rows, 1]
+    assert scale.shape == (v.shape[0], 1)
+    quantized = torch.round(v / scale)
+    quantized = torch.clamp(quantized, -qmax, qmax)
+    quantized = quantized.to(torch.int8)
+    i4_weights, i8scales, i8zeros = ixfop.quant_repack_int4(quantized.to(torch.int8).unsqueeze_(0), group_size, version, format, not symmetric)
+    return i4_weights.squeeze(0), scale.to(torch.float32), i8scales, i8zeros
+
+
+def main(fp8_path, int8_path, group_size, format, symmetric, version, split_count):
+    """
+    Converts FP8 weights to BF16 and saves the converted weights.
+
+    This function reads FP8 weights from the specified directory, converts them to BF16,
+    and saves the converted weights to another specified directory. It also updates the
+    model index file to reflect the changes.
+
+    Args:
+    fp8_path (str): The path to the directory containing the FP8 weights and model index file.
+    int8_path (str): The path to the directory where the converted int8 weights will be saved.
+
+    Raises:
+    KeyError: If a required scale_inv tensor is missing for a weight.
+
+    Notes:
+    - The function assumes that the FP8 weights are stored in safetensor files.
+    - The function caches loaded safetensor files to optimize memory usage.
+    - The function updates the model index file to remove references to scale_inv tensors.
+    """
+    torch.set_default_dtype(torch.bfloat16)
+    os.makedirs(int8_path, exist_ok=True)
+    model_index_file = os.path.join(fp8_path, "model.safetensors.index.json")
+    with open(model_index_file, "r") as f:
+        model_index = json.load(f)
+    weight_map = model_index["weight_map"]
+    
+    # Cache for loaded safetensor files
+    loaded_files = {}
+    fp8_weight_names = []
+
+
+    safetensor_files = list(glob(os.path.join(fp8_path, "*.safetensors")))
+    safetensor_files.sort()
+    
+    new_weight_map = {}
+    all_safetensor = safetensor_files
+    all_files = list(glob(os.path.join(fp8_path, "*")))
+    if split_count is None:
+        
+        safetensor_files = safetensor_files
+    elif split_count == 1:
+        safetensor_files = safetensor_files[:-2]
+    else:
+        safetensor_files = safetensor_files[-2:]
+    
+
+    for safetensor_file in tqdm(safetensor_files):
+        file_name = os.path.basename(safetensor_file)
+        current_state_dict = load_file(safetensor_file, device="cuda")
+        loaded_files[file_name] = current_state_dict
+        
+        new_state_dict = {}
+        for weight_name, weight in current_state_dict.items():
+            if "experts" in weight_name:
+                i4_weights, scale, i8scales, i8zeros = weight_quant_moe(weight, 128, group_size, format, symmetric, version)
+                if version == 2:
+                    scale = scale.contiguous().view(1, -1)
+                else:
+                    assert scale.is_contiguous()
+                new_state_dict[weight_name] = i4_weights
+                sacle_name = weight_name.replace("weight","weight_scale")
+                new_state_dict[sacle_name] = scale
+                
+                new_weight_map[weight_name] = file_name
+                new_weight_map[sacle_name] = file_name
+                
+                
+                if i8scales is not None:
+                    i8scales = i8scales.squeeze_(0)
+                    assert i8scales.dim() == 2
+                    
+                    i8scales_name = weight_name.replace("weight","i8_weight_scale")
+                    new_state_dict[i8scales_name] = i8scales
+                    new_weight_map[i8scales_name] = file_name
+                    
+                    
+                if i8zeros is not None:
+                    i8zeros = i8zeros.squeeze_(0)
+                    assert i8zeros.dim() == 2
+                    i8zeros_name = weight_name.replace("weight","i8_weight_zero")
+                    new_state_dict[i8zeros_name] = i8zeros
+                    new_weight_map[i8zeros_name] = file_name
+
+            elif "proj" in weight_name:
+                int8_v, scale = weight_quant(weight)
+                new_state_dict[weight_name] = int8_v
+                new_scale_name = weight_name + "_scale"
+                new_state_dict[new_scale_name] = scale
+                
+                new_weight_map[weight_name] = file_name
+                new_weight_map[new_scale_name] = file_name
+            else:
+                new_state_dict[weight_name] = weight
+                new_weight_map[weight_name] = file_name
+                      
+        new_safetensor_file = os.path.join(int8_path, file_name)
+        save_file(new_state_dict, new_safetensor_file)
+        # Memory management: keep only the 2 most recently used files
+        if len(loaded_files) > 1:
+            oldest_file = next(iter(loaded_files))
+            del loaded_files[oldest_file]
+            torch.cuda.empty_cache()
+            
+    other_files = list(set(all_files) - set(all_safetensor))
+    for other_file in other_files:
+        if os.path.isfile(other_file):
+            name = other_file.rsplit("/", 1)[1]
+            shutil.copy(os.path.join(other_file),
+                        os.path.join(int8_path, name))
+    
+    compression_config = {
+        "config_groups": {
+            "group_0": {
+                "input_activations": {
+                    "block_structure": None,
+                    "dynamic": True,
+                    "group_size": None, 
+                    "num_bits": 8,
+                    "observer": "memoryless",
+                    "observer_kwargs": {},
+                    "strategy": "token",
+                    "symmetric": True,
+                    "type": "int"
+                },
+                "output_activations": None,
+                "targets": [
+                    "Linear"
+                ],
+                "weights": {
+                    "block_structure": None,
+                    "dynamic": False,
+                    "group_size": None if group_size==-1 else group_size,
+                    "num_bits": 8,
+                    "observer": "minmax",
+                    "observer_kwargs": {},
+                    "strategy": "channel" if group_size == -1 else "group",
+                    "symmetric": bool(symmetric),
+                    "type": "int"
+                }
+            }
+        },
+        "format": "int-quantized",
+        "global_compression_ratio": 1.0,
+        "ignore": [
+            "lm_head"
+        ],
+        "kv_cache_scheme": None,
+        "quant_method": "compressed-tensors",
+        "quantization_status": "frozen"
+    }
+    
+    with open(os.path.join(int8_path, "config.json"), encoding="utf-8") as file:
+        configs:dict = json.loads(file.read())
+        # configs.pop("quantization_config")
+        configs["compression_config"] = compression_config
+    with open(os.path.join(int8_path, "config.json"), encoding="utf-8", mode="w") as f:
+        json.dump(configs, f)
+        
+        
+    with open(model_index_file, "r") as f:
+        model_index = json.load(f)
+    model_index["weight_map"] = new_weight_map
+    new_model_index_file = os.path.join(int8_path, "model.safetensors.index.json")
+    with open(new_model_index_file, "w", encoding="utf-8") as f:
+        json.dump(model_index, f, indent=2, ensure_ascii=False, sort_keys=True)
+    print(f"model.safetensors.index.json modified and saved to {new_model_index_file}")    
+
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--input-fp8-hf-path", type=str, required=True)
+    parser.add_argument("--output-int8-hf-path", type=str, required=True)
+    parser.add_argument("--group-size", type=int, default=-1)
+    parser.add_argument("--format", type=str, default="TN")
+    parser.add_argument("--symmetric", type=bool, default=True)
+    parser.add_argument("--version", type=int, default=2)
+    parser.add_argument("--split-count", type=int, default=None)
+    args = parser.parse_args()
+    main(args.input_fp8_hf_path, args.output_int8_hf_path, args.group_size, args.format, args.symmetric, args.version, args.split_count)
\ No newline at end of file
diff --git a/models/nlp/llm/qwen3-235b/vllm/ci/prepare.sh b/models/nlp/llm/qwen3-235b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cfd5031a7d3e6ac57abbc085dd41fc1063482bee
--- /dev/null
+++ b/models/nlp/llm/qwen3-235b/vllm/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer
+pip3 install vllm
+pip3 install triton
+pip3 install ixformer
\ No newline at end of file
diff --git a/models/speech/asr/ultravox/vllm/README.md b/models/speech/asr/ultravox/vllm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..55a5d46087bae918865150cd9eae398838e6758a
--- /dev/null
+++ b/models/speech/asr/ultravox/vllm/README.md
@@ -0,0 +1,30 @@
+# Ultravox (vLLM)
+
+## Model Description
+
+Ultravox is a multimodal model that can consume both speech and text as input (e.g., a text system prompt and voice user message). The input to the model is given as a text prompt with a special <|audio|> pseudo-token, and the model processor will replace this magic token with embeddings derived from the input audio. Using the merged embeddings as input, the model will then generate output text as usual.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0     |  25.12  |
+
+## Model Preparation
+
+### Prepare Resources
+
+- Model: <https://huggingface.co/fixie-ai/ultravox-v0_5-llama-3_2-1b>
+
+### Install Dependencies
+
+In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website.
+
+## Model Inference
+
+```bash
+export VLLM_ASSETS_CACHE=../vllm/
+python3 offline_inference_audio_language.py --model /path/to/ultravox-v0_5-llama-3_2-1b
+```
+
+## Model Results
diff --git a/models/speech/asr/ultravox/vllm/ci/prepare.sh b/models/speech/asr/ultravox/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4a733ba9d2940660d21de58d9f2995c208543adc
--- /dev/null
+++ b/models/speech/asr/ultravox/vllm/ci/prepare.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
diff --git a/models/speech/asr/ultravox/vllm/offline_inference_audio_language.py b/models/speech/asr/ultravox/vllm/offline_inference_audio_language.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2172bbf7a01ed8025a1606a058fb6307cd99d9c
--- /dev/null
+++ b/models/speech/asr/ultravox/vllm/offline_inference_audio_language.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+This example shows how to use vLLM for running offline inference 
+with the correct prompt format on audio language models.
+
+For most models, the prompt format should follow corresponding examples
+on HuggingFace model repository.
+"""
+from transformers import AutoTokenizer
+
+from vllm import LLM, SamplingParams
+from vllm.assets.audio import AudioAsset
+from vllm.utils import FlexibleArgumentParser
+
+audio_assets = [AudioAsset("mary_had_lamb"), AudioAsset("winning_call")]
+question_per_audio_count = {
+    0: "What is 1+1?",
+    1: "What is recited in the audio?",
+    2: "What sport and what nursery rhyme are referenced?"
+}
+
+
+# Ultravox 0.5-1B
+def run_ultravox(question, audio_count, model_name):
+    
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    messages = [{
+        'role': 'user',
+        'content': "<|audio|>\n" * audio_count + question
+    }]
+    prompt = tokenizer.apply_chat_template(messages,
+                                           tokenize=False,
+                                           add_generation_prompt=True)
+
+    llm = LLM(model=model_name,
+              max_model_len=4096,
+              max_num_seqs=5,
+              trust_remote_code=True,
+              limit_mm_per_prompt={"audio": audio_count})
+    stop_token_ids = None
+    return llm, prompt, stop_token_ids
+
+
+model_example_map = {
+    "ultravox": run_ultravox,
+}
+
+
+def main(args):
+    model = args.model_type
+    if model not in model_example_map:
+        raise ValueError(f"Model type {model} is not supported.")
+
+    model_name =  args.model
+    audio_count = args.num_audios
+    llm, prompt, stop_token_ids = model_example_map[model](
+        question_per_audio_count[audio_count], audio_count, model_name)
+
+    # We set temperature to 0.2 so that outputs can be different
+    # even when all prompts are identical when running batch inference.
+    sampling_params = SamplingParams(temperature=0.2,
+                                     max_tokens=64,
+                                     stop_token_ids=stop_token_ids)
+    
+    mm_data = {}
+    if audio_count > 0:
+        mm_data = {
+            "audio": [
+                asset.audio_and_sample_rate
+                for asset in audio_assets[:audio_count]
+            ]
+        }
+
+    assert args.num_prompts > 0
+    inputs = {"prompt": prompt, "multi_modal_data": mm_data}
+    if args.num_prompts > 1:
+        # Batch inference
+        inputs = [inputs] * args.num_prompts
+
+    outputs = llm.generate(inputs, sampling_params=sampling_params)
+
+    for o in outputs:
+        generated_text = o.outputs[0].text
+        print(generated_text)
+
+
+if __name__ == "__main__":
+    parser = FlexibleArgumentParser(
+        description='Demo on using vLLM for offline inference with '
+        'audio language models')
+    parser.add_argument('--model-type',
+                        '-m',
+                        type=str,
+                        default="ultravox",
+                        choices=model_example_map.keys(),
+                        help='Huggingface "model_type".')
+    parser.add_argument('--model',
+                        type=str)
+    parser.add_argument('--num-prompts',
+                        type=int,
+                        default=1,
+                        help='Number of prompts to run.')
+    parser.add_argument("--num-audios",
+                        type=int,
+                        default=1,
+                        choices=[0, 1, 2],
+                        help="Number of audio items per prompt.")
+
+    args = parser.parse_args()
+    main(args)