From 7381fd9286cbffb09ec307162b5abe84bd9ee3ab Mon Sep 17 00:00:00 2001
From: "xinchi.tian" <xinchi.tian@iluvatar.com>
Date: Wed, 9 Oct 2024 14:47:38 +0800
Subject: [PATCH 1/8] Add densenet161 in IxRT link #IAVG3P

Signed-off-by: xinchi.tian <xinchi.tian@iluvatar.com>
---
 .../classification/densenet161/ixrt/README.md |  55 ++++++
 .../densenet161/ixrt/build_engine.py          |  61 +++++++
 .../densenet161/ixrt/calibration_dataset.py   | 112 ++++++++++++
 .../classification/densenet161/ixrt/common.py |  80 ++++++++
 .../densenet161/ixrt/config/DENSENET_CONFIG   |  33 ++++
 .../classification/densenet161/ixrt/export.py |  74 ++++++++
 .../densenet161/ixrt/inference.py             | 171 ++++++++++++++++++
 .../densenet161/ixrt/modify_batchsize.py      |  56 ++++++
 .../scripts/infer_densenet_fp16_accuracy.sh   | 119 ++++++++++++
 .../infer_densenet_fp16_performance.sh        | 119 ++++++++++++
 .../densenet161/ixrt/simplify_model.py        |  40 ++++
 11 files changed, 920 insertions(+)
 create mode 100644 models/cv/classification/densenet161/ixrt/README.md
 create mode 100644 models/cv/classification/densenet161/ixrt/build_engine.py
 create mode 100644 models/cv/classification/densenet161/ixrt/calibration_dataset.py
 create mode 100644 models/cv/classification/densenet161/ixrt/common.py
 create mode 100644 models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG
 create mode 100644 models/cv/classification/densenet161/ixrt/export.py
 create mode 100644 models/cv/classification/densenet161/ixrt/inference.py
 create mode 100644 models/cv/classification/densenet161/ixrt/modify_batchsize.py
 create mode 100644 models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
 create mode 100644 models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
 create mode 100644 models/cv/classification/densenet161/ixrt/simplify_model.py

diff --git a/models/cv/classification/densenet161/ixrt/README.md b/models/cv/classification/densenet161/ixrt/README.md
new file mode 100644
index 00000000..08e7bd16
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/README.md
@@ -0,0 +1,55 @@
+# DenseNet161
+
+## Description
+
+DenseNet161 is a convolutional neural network architecture that belongs to the family of Dense Convolutional Networks (DenseNets). Introduced as an extension to the previous DenseNet models, DenseNet161 offers improved performance and deeper network capacity, making it suitable for various computer vision tasks.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install cuda-python
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/densenet161-8d451a50.pth>
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight densenet161-8d451a50.pth --output densenet161.onnx
+cd data && mkdir checkpoints && cd checkpoints && mkdir densenet161
+mv densenet161.onnx densenet161
+```
+
+## Inference
+
+
+### FP16
+
+```bash
+cd deepsparkinference
+# Accuracy
+bash models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
+# Performance
+bash models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
+```
+
+## Results
+
+| Model       | BatchSize | Precision | FPS     | Top-1(%) | Top-5(%) |
+| ----------- | --------- | --------- | ------- | -------- | -------- |
+| DenseNet161 | 32        | FP16      | 589.784 | 0.7771   | 0.9354   |
diff --git a/models/cv/classification/densenet161/ixrt/build_engine.py b/models/cv/classification/densenet161/ixrt/build_engine.py
new file mode 100644
index 00000000..c0a83a1e
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/build_engine.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import argparse
+import numpy as np
+import torch
+import tensorrt
+import cuda.cudart as cudart
+
+def assertSuccess(err):
+    assert(err == cudart.cudaError_t.cudaSuccess)
+
+
+def main(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.FP16
+    
+    build_config.set_flag(precision)
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "float32"], default="float16",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default=None)
+    parser.add_argument(
+        "--datasets_dir",
+        type=str,
+        default="deepsparkinference/data/datasets/imagenet_val/",
+        help="ImageNet dir",
+    )
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/models/cv/classification/densenet161/ixrt/calibration_dataset.py b/models/cv/classification/densenet161/ixrt/calibration_dataset.py
new file mode 100644
index 00000000..ec931c65
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/calibration_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+    def __init__(self, *args, **kwargs):
+        super(CalibrationImageNet, self).__init__(*args, **kwargs)
+        img2label_path = os.path.join(self.root, "val_map.txt")
+        if not os.path.exists(img2label_path):
+            raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+        self.img2label_map = self.make_img2label_map(img2label_path)
+
+    def make_img2label_map(self, path):
+        with open(path) as f:
+            lines = f.readlines()
+
+        img2lable_map = dict()
+        for line in lines:
+            line = line.lstrip().rstrip().split("\t")
+            if len(line) != 2:
+                continue
+            img_name, label = line
+            img_name = img_name.strip()
+            if img_name in [None, ""]:
+                continue
+            label = int(label.strip())
+            img2lable_map[img_name] = label
+        return img2lable_map
+
+    def __getitem__(self, index):
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        # if self.target_transform is not None:
+        #     target = self.target_transform(target)
+        img_name = os.path.basename(path)
+        target = self.img2label_map[img_name]
+
+        return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+    dataset = CalibrationImageNet(
+        data_path,
+        transform=T.Compose(
+            [
+                T.Resize(256),
+                T.CenterCrop(img_sz),
+                T.ToTensor(),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        ),
+    )
+
+    calibration_dataset = dataset
+    if num_samples is not None:
+        calibration_dataset = torch.utils.data.Subset(
+            dataset, indices=range(num_samples)
+        )
+
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=True,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    verify_dataloader = DataLoader(
+        dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+    num_samples = min(total_sample, step * batch_size)
+    if step < 0:
+        num_samples = None
+    calibration_dataloader, _ = create_dataloaders(
+        dataset_dir,
+        img_sz=img_sz,
+        batch_size=batch_size,
+        workers=workers,
+        num_samples=num_samples,
+    )
+    return calibration_dataloader
diff --git a/models/cv/classification/densenet161/ixrt/common.py b/models/cv/classification/densenet161/ixrt/common.py
new file mode 100644
index 00000000..21c2b399
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/common.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+    batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+    values, indices = batch_score.topk(5)
+    top1, top5 = 0, 0
+    for idx, label in enumerate(batch_label):
+
+        if label == indices[idx][0]:
+            top1 += 1
+        if label in indices[idx]:
+            top5 += 1
+    return top1, top5
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
diff --git a/models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG b/models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG
new file mode 100644
index 00000000..b318ab49
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=DenseNet
+ORIGINE_MODEL=densenet161.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=minmax
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/densenet161/ixrt/export.py b/models/cv/classification/densenet161/ixrt/export.py
new file mode 100644
index 00000000..2e696e96
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+import re
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.densenet161(weights=False)
+
+    state_dict = torch.load(args.weight)
+
+    pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$'
+    )
+    for key in list(state_dict.keys()):
+        res = pattern.match(key)
+        if res:
+            new_key = res.group(1) + res.group(2)
+            state_dict[new_key] = state_dict[key]
+            del state_dict[key]
+
+    model.load_state_dict(state_dict)
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = None, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/densenet161/ixrt/inference.py b/models/cv/classification/densenet161/ixrt/inference.py
new file mode 100644
index 00000000..22f1644c
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/inference.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+    dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+    # Load Engine && I/O bindings
+    engine, context = create_engine_context(config.engine_file, logger)
+    inputs, outputs, allocations = get_io_bindings(engine)
+
+    # Warm up
+    if config.warm_up > 0:
+        print("\nWarm Start.")
+        for i in range(config.warm_up):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+
+    # Inference
+    if config.test_mode == "FPS":
+        torch.cuda.synchronize()
+        start_time = time.time()
+
+        for i in range(config.loop_count):
+            context.execute_v2(allocations)
+
+        torch.cuda.synchronize()
+        end_time = time.time()
+        forward_time = end_time - start_time
+
+        num_samples = 50000
+        if config.loop_count * config.bsz < num_samples:
+            num_samples = config.loop_count * config.bsz
+        fps = num_samples / forward_time
+
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+        if fps >= config.fps_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+    elif config.test_mode == "ACC":
+
+        ## Prepare the output data
+        output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+        print(f"output shape : {output.shape} output type : {output.dtype}")
+
+        total_sample = 0
+        acc_top1, acc_top5 = 0, 0
+
+        with tqdm(total= len(dataloader)) as _tqdm:
+            for idx, (batch_data, batch_label) in enumerate(dataloader):
+                batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+                batch_data = np.ascontiguousarray(batch_data)
+                total_sample += batch_data.shape[0]
+                (err,) = cudart.cudaMemcpy(
+                inputs[0]["allocation"],
+                batch_data,
+                batch_data.nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+                context.execute_v2(allocations)
+                (err,) = cudart.cudaMemcpy(
+                output,
+                outputs[0]["allocation"],
+                outputs[0]["nbytes"],
+                cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+                # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+                if len(output.shape) == 4:
+                    output = output.squeeze(axis=(2,3))
+
+                batch_top1, batch_top5 = eval_batch(output, batch_label)
+                acc_top1 += batch_top1
+                acc_top5 += batch_top5
+
+                _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+                                    acc_5='{:.4f}'.format(acc_top5/total_sample))
+                _tqdm.update(1)
+        err, = cudart.cudaFree(inputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        err, = cudart.cudaFree(outputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+        print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+        acc1 = acc_top1/total_sample
+        print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+        if acc1 >= config.acc_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+def parse_config():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+    parser.add_argument(
+        "--engine_file",
+        type=str,
+        help="engine file path"
+    )
+    parser.add_argument(
+        "--datasets_dir",
+        type=str,
+        default="",
+        help="ImageNet dir",
+    )
+    parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+    parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+    parser.add_argument(
+        "--imgsz",
+        "--img",
+        "--img-size",
+        type=int,
+        default=224,
+        help="inference size h,w",
+    )
+    parser.add_argument("--use_async", action="store_true")
+    parser.add_argument(
+        "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+    )
+    parser.add_argument("--fps_target", type=float, default=-1.0)
+    parser.add_argument("--acc_target", type=float, default=-1.0)
+    parser.add_argument("--loop_count", type=int, default=-1)
+
+    config = parser.parse_args()
+    return config
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
diff --git a/models/cv/classification/densenet161/ixrt/modify_batchsize.py b/models/cv/classification/densenet161/ixrt/modify_batchsize.py
new file mode 100644
index 00000000..689b7a97
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/modify_batchsize.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+    batch_size = bsz
+
+    # The following code changes the first dimension of every input to be batch_size
+    # Modify as appropriate ... note that this requires all inputs to
+    # have the same batch_size
+    inputs = model.graph.input
+    for input in inputs:
+        # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+        # Add checks as needed.
+        dim1 = input.type.tensor_type.shape.dim[0]
+        # update dim to be a symbolic value
+        if isinstance(batch_size, str):
+            # set dynamic batch size
+            dim1.dim_param = batch_size
+        elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+            # set given batch size
+            dim1.dim_value = int(batch_size)
+        else:
+            # set batch size of 1
+            dim1.dim_value = 1
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--batch_size", type=int)
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+    
+
+
+
diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
new file mode 100644
index 00000000..e266c65a
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=$(cd $(dirname $0);cd ../../../../../..; pwd)
+DATASETS_DIR="${PROJ_DIR}/data/datasets/imagenet_val/"
+CHECKPOINTS_DIR="${PROJ_DIR}/data/checkpoints/densenet161/"
+RUN_DIR="${PROJ_DIR}/models/cv/classification/densenet161/ixrt/"
+CONFIG_DIR="${PROJ_DIR}/models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
new file mode 100644
index 00000000..cc0816ef
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=$(cd $(dirname $0);cd ../../../../../..; pwd)
+DATASETS_DIR="${PROJ_DIR}/data/datasets/imagenet_val/"
+CHECKPOINTS_DIR="${PROJ_DIR}/data/checkpoints/densenet161/"
+RUN_DIR="${PROJ_DIR}/models/cv/classification/densenet161/ixrt/"
+CONFIG_DIR="${PROJ_DIR}/models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --fps_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/densenet161/ixrt/simplify_model.py b/models/cv/classification/densenet161/ixrt/simplify_model.py
new file mode 100644
index 00000000..9948a9fa
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/simplify_model.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+    onnx_model = onnx.load(args.origin_model)
+    model_simp, check = simplify(onnx_model)
+    model_simp = onnx.shape_inference.infer_shapes(model_simp)
+    onnx.save(model_simp, args.output_model)
+    print("  Simplify onnx Done.")
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    parser.add_argument("--reshape", action="store_true")
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+simplify_model(args)
+    
+
+
+
-- 
Gitee


From 43dfd4c4f2b3f48a056fbe5068ad1257a05c2579 Mon Sep 17 00:00:00 2001
From: "xinchi.tian" <xinchi.tian@iluvatar.com>
Date: Wed, 9 Oct 2024 14:52:52 +0800
Subject: [PATCH 2/8] Add densenet169 in IxRT link #IAVG3P

Signed-off-by: xinchi.tian <xinchi.tian@iluvatar.com>
---
 .../classification/densenet169/ixrt/README.md |  56 ++++++
 .../densenet169/ixrt/build_engine.py          |  61 +++++++
 .../densenet169/ixrt/calibration_dataset.py   | 112 ++++++++++++
 .../classification/densenet169/ixrt/common.py |  80 ++++++++
 .../densenet169/ixrt/config/DENSENET_CONFIG   |  33 ++++
 .../classification/densenet169/ixrt/export.py |  74 ++++++++
 .../densenet169/ixrt/inference.py             | 171 ++++++++++++++++++
 .../densenet169/ixrt/modify_batchsize.py      |  56 ++++++
 .../scripts/infer_densenet_fp16_accuracy.sh   | 119 ++++++++++++
 .../infer_densenet_fp16_performance.sh        | 119 ++++++++++++
 .../densenet169/ixrt/simplify_model.py        |  40 ++++
 11 files changed, 921 insertions(+)
 create mode 100644 models/cv/classification/densenet169/ixrt/README.md
 create mode 100644 models/cv/classification/densenet169/ixrt/build_engine.py
 create mode 100644 models/cv/classification/densenet169/ixrt/calibration_dataset.py
 create mode 100644 models/cv/classification/densenet169/ixrt/common.py
 create mode 100644 models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG
 create mode 100644 models/cv/classification/densenet169/ixrt/export.py
 create mode 100644 models/cv/classification/densenet169/ixrt/inference.py
 create mode 100644 models/cv/classification/densenet169/ixrt/modify_batchsize.py
 create mode 100644 models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
 create mode 100644 models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
 create mode 100644 models/cv/classification/densenet169/ixrt/simplify_model.py

diff --git a/models/cv/classification/densenet169/ixrt/README.md b/models/cv/classification/densenet169/ixrt/README.md
new file mode 100644
index 00000000..8afb8971
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/README.md
@@ -0,0 +1,56 @@
+# DenseNet169
+
+## Description
+
+Dense Convolutional Network (DenseNet), connects each layer to every other layer in a feed-forward fashion. Whereas traditional convolutional networks with L layers have L connections - one between each layer and its subsequent layer - our network has L(L+1)/2 direct connections.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install cuda-python
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/densenet169-b2777c0a.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight densenet169-b2777c0a.pth --output densenet169.onnx
+cd data && mkdir checkpoints && cd checkpoints && mkdir densenet169
+mv densenet169.onnx densenet169
+```
+
+## Inference
+
+
+### FP16
+
+```bash
+cd deepsparkinference
+# Accuracy
+bash models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
+# Performance
+bash models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
+```
+
+## Results
+
+| Model    | BatchSize | Precision | FPS     | Top-1(%) | Top-5(%) |
+| -------- | --------- | --------- | ------- | -------- | -------- |
+| DenseNet | 32        | FP16      | 1119.69 | 0.7558   | 0.9284   |
diff --git a/models/cv/classification/densenet169/ixrt/build_engine.py b/models/cv/classification/densenet169/ixrt/build_engine.py
new file mode 100644
index 00000000..c0a83a1e
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/build_engine.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import argparse
+import numpy as np
+import torch
+import tensorrt
+import cuda.cudart as cudart
+
+def assertSuccess(err):
+    assert(err == cudart.cudaError_t.cudaSuccess)
+
+
+def main(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.FP16
+    
+    build_config.set_flag(precision)
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "float32"], default="float16",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default=None)
+    parser.add_argument(
+        "--datasets_dir",
+        type=str,
+        default="deepsparkinference/data/datasets/imagenet_val/",
+        help="ImageNet dir",
+    )
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/models/cv/classification/densenet169/ixrt/calibration_dataset.py b/models/cv/classification/densenet169/ixrt/calibration_dataset.py
new file mode 100644
index 00000000..ec931c65
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/calibration_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+    def __init__(self, *args, **kwargs):
+        super(CalibrationImageNet, self).__init__(*args, **kwargs)
+        img2label_path = os.path.join(self.root, "val_map.txt")
+        if not os.path.exists(img2label_path):
+            raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+        self.img2label_map = self.make_img2label_map(img2label_path)
+
+    def make_img2label_map(self, path):
+        with open(path) as f:
+            lines = f.readlines()
+
+        img2lable_map = dict()
+        for line in lines:
+            line = line.lstrip().rstrip().split("\t")
+            if len(line) != 2:
+                continue
+            img_name, label = line
+            img_name = img_name.strip()
+            if img_name in [None, ""]:
+                continue
+            label = int(label.strip())
+            img2lable_map[img_name] = label
+        return img2lable_map
+
+    def __getitem__(self, index):
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        # if self.target_transform is not None:
+        #     target = self.target_transform(target)
+        img_name = os.path.basename(path)
+        target = self.img2label_map[img_name]
+
+        return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+    dataset = CalibrationImageNet(
+        data_path,
+        transform=T.Compose(
+            [
+                T.Resize(256),
+                T.CenterCrop(img_sz),
+                T.ToTensor(),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        ),
+    )
+
+    calibration_dataset = dataset
+    if num_samples is not None:
+        calibration_dataset = torch.utils.data.Subset(
+            dataset, indices=range(num_samples)
+        )
+
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=True,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    verify_dataloader = DataLoader(
+        dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+    num_samples = min(total_sample, step * batch_size)
+    if step < 0:
+        num_samples = None
+    calibration_dataloader, _ = create_dataloaders(
+        dataset_dir,
+        img_sz=img_sz,
+        batch_size=batch_size,
+        workers=workers,
+        num_samples=num_samples,
+    )
+    return calibration_dataloader
diff --git a/models/cv/classification/densenet169/ixrt/common.py b/models/cv/classification/densenet169/ixrt/common.py
new file mode 100644
index 00000000..21c2b399
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/common.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+    batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+    values, indices = batch_score.topk(5)
+    top1, top5 = 0, 0
+    for idx, label in enumerate(batch_label):
+
+        if label == indices[idx][0]:
+            top1 += 1
+        if label in indices[idx]:
+            top5 += 1
+    return top1, top5
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
diff --git a/models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG b/models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG
new file mode 100644
index 00000000..73aeedee
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=DenseNet
+ORIGINE_MODEL=densenet169.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=minmax
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/densenet169/ixrt/export.py b/models/cv/classification/densenet169/ixrt/export.py
new file mode 100644
index 00000000..dd3743c9
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+import re
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.densenet169(weights=False)
+
+    state_dict = torch.load(args.weight)
+
+    pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$'
+    )
+    for key in list(state_dict.keys()):
+        res = pattern.match(key)
+        if res:
+            new_key = res.group(1) + res.group(2)
+            state_dict[new_key] = state_dict[key]
+            del state_dict[key]
+
+    model.load_state_dict(state_dict)
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = None, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/densenet169/ixrt/inference.py b/models/cv/classification/densenet169/ixrt/inference.py
new file mode 100644
index 00000000..22f1644c
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/inference.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+    dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+    # Load Engine && I/O bindings
+    engine, context = create_engine_context(config.engine_file, logger)
+    inputs, outputs, allocations = get_io_bindings(engine)
+
+    # Warm up
+    if config.warm_up > 0:
+        print("\nWarm Start.")
+        for i in range(config.warm_up):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+
+    # Inference
+    if config.test_mode == "FPS":
+        torch.cuda.synchronize()
+        start_time = time.time()
+
+        for i in range(config.loop_count):
+            context.execute_v2(allocations)
+
+        torch.cuda.synchronize()
+        end_time = time.time()
+        forward_time = end_time - start_time
+
+        num_samples = 50000
+        if config.loop_count * config.bsz < num_samples:
+            num_samples = config.loop_count * config.bsz
+        fps = num_samples / forward_time
+
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+        if fps >= config.fps_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+    elif config.test_mode == "ACC":
+
+        ## Prepare the output data
+        output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+        print(f"output shape : {output.shape} output type : {output.dtype}")
+
+        total_sample = 0
+        acc_top1, acc_top5 = 0, 0
+
+        with tqdm(total= len(dataloader)) as _tqdm:
+            for idx, (batch_data, batch_label) in enumerate(dataloader):
+                batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+                batch_data = np.ascontiguousarray(batch_data)
+                total_sample += batch_data.shape[0]
+                (err,) = cudart.cudaMemcpy(
+                inputs[0]["allocation"],
+                batch_data,
+                batch_data.nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+                context.execute_v2(allocations)
+                (err,) = cudart.cudaMemcpy(
+                output,
+                outputs[0]["allocation"],
+                outputs[0]["nbytes"],
+                cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+                # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+                if len(output.shape) == 4:
+                    output = output.squeeze(axis=(2,3))
+
+                batch_top1, batch_top5 = eval_batch(output, batch_label)
+                acc_top1 += batch_top1
+                acc_top5 += batch_top5
+
+                _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+                                    acc_5='{:.4f}'.format(acc_top5/total_sample))
+                _tqdm.update(1)
+        err, = cudart.cudaFree(inputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        err, = cudart.cudaFree(outputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+        print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+        acc1 = acc_top1/total_sample
+        print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+        if acc1 >= config.acc_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+def parse_config():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+    parser.add_argument(
+        "--engine_file",
+        type=str,
+        help="engine file path"
+    )
+    parser.add_argument(
+        "--datasets_dir",
+        type=str,
+        default="",
+        help="ImageNet dir",
+    )
+    parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+    parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+    parser.add_argument(
+        "--imgsz",
+        "--img",
+        "--img-size",
+        type=int,
+        default=224,
+        help="inference size h,w",
+    )
+    parser.add_argument("--use_async", action="store_true")
+    parser.add_argument(
+        "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+    )
+    parser.add_argument("--fps_target", type=float, default=-1.0)
+    parser.add_argument("--acc_target", type=float, default=-1.0)
+    parser.add_argument("--loop_count", type=int, default=-1)
+
+    config = parser.parse_args()
+    return config
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
diff --git a/models/cv/classification/densenet169/ixrt/modify_batchsize.py b/models/cv/classification/densenet169/ixrt/modify_batchsize.py
new file mode 100644
index 00000000..689b7a97
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/modify_batchsize.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+    batch_size = bsz
+
+    # The following code changes the first dimension of every input to be batch_size
+    # Modify as appropriate ... note that this requires all inputs to
+    # have the same batch_size
+    inputs = model.graph.input
+    for input in inputs:
+        # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+        # Add checks as needed.
+        dim1 = input.type.tensor_type.shape.dim[0]
+        # update dim to be a symbolic value
+        if isinstance(batch_size, str):
+            # set dynamic batch size
+            dim1.dim_param = batch_size
+        elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+            # set given batch size
+            dim1.dim_value = int(batch_size)
+        else:
+            # set batch size of 1
+            dim1.dim_value = 1
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--batch_size", type=int)
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+    
+
+
+
diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
new file mode 100644
index 00000000..78721dd8
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=$(cd $(dirname $0);cd ../../../../../..; pwd)
+DATASETS_DIR="${PROJ_DIR}/data/datasets/imagenet_val/"
+CHECKPOINTS_DIR="${PROJ_DIR}/data/checkpoints/densenet169/"
+RUN_DIR="${PROJ_DIR}/models/cv/classification/densenet169/ixrt/"
+CONFIG_DIR="${PROJ_DIR}/models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
new file mode 100644
index 00000000..e7c087bf
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=$(cd $(dirname $0);cd ../../../../../..; pwd)
+DATASETS_DIR="${PROJ_DIR}/data/datasets/imagenet_val/"
+CHECKPOINTS_DIR="${PROJ_DIR}/data/checkpoints/densenet169/"
+RUN_DIR="${PROJ_DIR}/models/cv/classification/densenet169/ixrt/"
+CONFIG_DIR="${PROJ_DIR}/models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --fps_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/densenet169/ixrt/simplify_model.py b/models/cv/classification/densenet169/ixrt/simplify_model.py
new file mode 100644
index 00000000..9948a9fa
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/simplify_model.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+    onnx_model = onnx.load(args.origin_model)
+    model_simp, check = simplify(onnx_model)
+    model_simp = onnx.shape_inference.infer_shapes(model_simp)
+    onnx.save(model_simp, args.output_model)
+    print("  Simplify onnx Done.")
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    parser.add_argument("--reshape", action="store_true")
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+simplify_model(args)
+    
+
+
+
-- 
Gitee


From beed28977c322ffa86ebc31e6205af68edfaf1fb Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Fri, 11 Oct 2024 08:47:29 +0000
Subject: [PATCH 3/8] use current dir as run path

---
 models/cv/classification/densenet161/ixrt/README.md    | 10 +++++-----
 .../ixrt/scripts/infer_densenet_fp16_accuracy.sh       | 10 +++++-----
 .../ixrt/scripts/infer_densenet_fp16_performance.sh    | 10 +++++-----
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/models/cv/classification/densenet161/ixrt/README.md b/models/cv/classification/densenet161/ixrt/README.md
index 08e7bd16..cb65f64a 100644
--- a/models/cv/classification/densenet161/ixrt/README.md
+++ b/models/cv/classification/densenet161/ixrt/README.md
@@ -31,21 +31,21 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 
 ```bash
 python3 export.py --weight densenet161-8d451a50.pth --output densenet161.onnx
-cd data && mkdir checkpoints && cd checkpoints && mkdir densenet161
-mv densenet161.onnx densenet161
 ```
 
 ## Inference
 
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
 
 ### FP16
 
 ```bash
-cd deepsparkinference
 # Accuracy
-bash models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
+bash scripts/infer_densenet_fp16_accuracy.sh
 # Performance
-bash models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
+bash scripts/infer_densenet_fp16_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
index e266c65a..dfaf40d7 100644
--- a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
+++ b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
@@ -42,11 +42,11 @@ do
     esac
 done
 
-PROJ_DIR=$(cd $(dirname $0);cd ../../../../../..; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/datasets/imagenet_val/"
-CHECKPOINTS_DIR="${PROJ_DIR}/data/checkpoints/densenet161/"
-RUN_DIR="${PROJ_DIR}/models/cv/classification/densenet161/ixrt/"
-CONFIG_DIR="${PROJ_DIR}/models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG"
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/DENSENET_CONFIG"
 source ${CONFIG_DIR}
 ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
 
diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
index cc0816ef..f09ccfcc 100644
--- a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
+++ b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
@@ -42,11 +42,11 @@ do
     esac
 done
 
-PROJ_DIR=$(cd $(dirname $0);cd ../../../../../..; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/datasets/imagenet_val/"
-CHECKPOINTS_DIR="${PROJ_DIR}/data/checkpoints/densenet161/"
-RUN_DIR="${PROJ_DIR}/models/cv/classification/densenet161/ixrt/"
-CONFIG_DIR="${PROJ_DIR}/models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG"
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/DENSENET_CONFIG"
 source ${CONFIG_DIR}
 ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
 
-- 
Gitee


From aeb79a830562d7d06d92161fdc60bdffbd3291cc Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Fri, 11 Oct 2024 08:34:28 +0000
Subject: [PATCH 4/8] use current dir as run path

---
 models/cv/classification/densenet169/ixrt/README.md    | 10 +++++-----
 .../ixrt/scripts/infer_densenet_fp16_accuracy.sh       | 10 +++++-----
 .../ixrt/scripts/infer_densenet_fp16_performance.sh    | 10 +++++-----
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/models/cv/classification/densenet169/ixrt/README.md b/models/cv/classification/densenet169/ixrt/README.md
index 8afb8971..6abd12c0 100644
--- a/models/cv/classification/densenet169/ixrt/README.md
+++ b/models/cv/classification/densenet169/ixrt/README.md
@@ -32,21 +32,21 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 
 ```bash
 python3 export.py --weight densenet169-b2777c0a.pth --output densenet169.onnx
-cd data && mkdir checkpoints && cd checkpoints && mkdir densenet169
-mv densenet169.onnx densenet169
 ```
 
 ## Inference
 
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
 
 ### FP16
 
 ```bash
-cd deepsparkinference
 # Accuracy
-bash models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
+bash scripts/infer_densenet_fp16_accuracy.sh
 # Performance
-bash models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
+bash scripts/infer_densenet_fp16_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
index 78721dd8..eb17c406 100644
--- a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
+++ b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
@@ -42,11 +42,11 @@ do
     esac
 done
 
-PROJ_DIR=$(cd $(dirname $0);cd ../../../../../..; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/datasets/imagenet_val/"
-CHECKPOINTS_DIR="${PROJ_DIR}/data/checkpoints/densenet169/"
-RUN_DIR="${PROJ_DIR}/models/cv/classification/densenet169/ixrt/"
-CONFIG_DIR="${PROJ_DIR}/models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG"
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR=${DATASETS_DIR}
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/DENSENET_CONFIG"
 source ${CONFIG_DIR}
 ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
 
diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
index e7c087bf..f09ccfcc 100644
--- a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
+++ b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
@@ -42,11 +42,11 @@ do
     esac
 done
 
-PROJ_DIR=$(cd $(dirname $0);cd ../../../../../..; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/datasets/imagenet_val/"
-CHECKPOINTS_DIR="${PROJ_DIR}/data/checkpoints/densenet169/"
-RUN_DIR="${PROJ_DIR}/models/cv/classification/densenet169/ixrt/"
-CONFIG_DIR="${PROJ_DIR}/models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG"
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/DENSENET_CONFIG"
 source ${CONFIG_DIR}
 ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
 
-- 
Gitee


From 197455d100785efb6eae4761e62d61396f9ec6ae Mon Sep 17 00:00:00 2001
From: "xinchi.tian" <xinchi.tian@iluvatar.com>
Date: Wed, 9 Oct 2024 14:46:27 +0800
Subject: [PATCH 5/8] Add Convext in IxRT link #IAVG3P

Signed-off-by: xinchi.tian <xinchi.tian@iluvatar.com>
---
 .../convnext_small/ixrt/README.md             |  59 ++++++
 .../convnext_small/ixrt/build_engine.py       |  52 ++++++
 .../ixrt/calibration_dataset.py               | 112 ++++++++++++
 .../convnext_small/ixrt/common.py             |  81 +++++++++
 .../ixrt/config/CONVNEXT_CONFIG               |  33 ++++
 .../convnext_small/ixrt/export.py             |  61 +++++++
 .../convnext_small/ixrt/inference.py          | 171 ++++++++++++++++++
 .../convnext_small/ixrt/modify_batchsize.py   |  56 ++++++
 .../infer_convnext_small_fp16_accuracy.sh     | 119 ++++++++++++
 .../infer_convnext_small_fp16_performance.sh  | 119 ++++++++++++
 .../convnext_small/ixrt/simplify_model.py     |  40 ++++
 11 files changed, 903 insertions(+)
 create mode 100644 models/cv/classification/convnext_small/ixrt/README.md
 create mode 100644 models/cv/classification/convnext_small/ixrt/build_engine.py
 create mode 100644 models/cv/classification/convnext_small/ixrt/calibration_dataset.py
 create mode 100644 models/cv/classification/convnext_small/ixrt/common.py
 create mode 100644 models/cv/classification/convnext_small/ixrt/config/CONVNEXT_CONFIG
 create mode 100644 models/cv/classification/convnext_small/ixrt/export.py
 create mode 100644 models/cv/classification/convnext_small/ixrt/inference.py
 create mode 100644 models/cv/classification/convnext_small/ixrt/modify_batchsize.py
 create mode 100644 models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_accuracy.sh
 create mode 100644 models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_performance.sh
 create mode 100644 models/cv/classification/convnext_small/ixrt/simplify_model.py

diff --git a/models/cv/classification/convnext_small/ixrt/README.md b/models/cv/classification/convnext_small/ixrt/README.md
new file mode 100644
index 00000000..c6550180
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/README.md
@@ -0,0 +1,59 @@
+# ConvNeXt Small
+
+## Description
+
+The ConvNeXt Small model represents a significant stride in the evolution of convolutional neural networks (CNNs), introduced by researchers at Facebook AI Research (FAIR) and UC Berkeley. It is part of the ConvNeXt family, which challenges the dominance of Vision Transformers (ViTs) in the realm of visual recognition tasks.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install ppq
+pip3 install tqdm
+pip3 install cuda-python
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/convnext_small-0c510722.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight convnext_small-0c510722.pth --output convnext_small.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+
+# Accuracy
+bash scripts/infer_convnext_small_fp16_accuracy.sh
+# Performance
+bash scripts/infer_convnext_small_fp16_performance.sh
+```
+
+## Results
+
+| Model          | BatchSize | Precision | FPS     | Top-1(%) | Top-5(%) |
+| -------------- | --------- | --------- | ------- | -------- | -------- |
+| ConvNeXt Small | 32        | FP16      | 323.508 | 83.302   | 96.548   |
diff --git a/models/cv/classification/convnext_small/ixrt/build_engine.py b/models/cv/classification/convnext_small/ixrt/build_engine.py
new file mode 100644
index 00000000..038c15d5
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/build_engine.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+def main(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    build_config.set_flag(precision)
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default=None)
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
\ No newline at end of file
diff --git a/models/cv/classification/convnext_small/ixrt/calibration_dataset.py b/models/cv/classification/convnext_small/ixrt/calibration_dataset.py
new file mode 100644
index 00000000..ec931c65
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/calibration_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+    def __init__(self, *args, **kwargs):
+        super(CalibrationImageNet, self).__init__(*args, **kwargs)
+        img2label_path = os.path.join(self.root, "val_map.txt")
+        if not os.path.exists(img2label_path):
+            raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+        self.img2label_map = self.make_img2label_map(img2label_path)
+
+    def make_img2label_map(self, path):
+        with open(path) as f:
+            lines = f.readlines()
+
+        img2lable_map = dict()
+        for line in lines:
+            line = line.lstrip().rstrip().split("\t")
+            if len(line) != 2:
+                continue
+            img_name, label = line
+            img_name = img_name.strip()
+            if img_name in [None, ""]:
+                continue
+            label = int(label.strip())
+            img2lable_map[img_name] = label
+        return img2lable_map
+
+    def __getitem__(self, index):
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        # if self.target_transform is not None:
+        #     target = self.target_transform(target)
+        img_name = os.path.basename(path)
+        target = self.img2label_map[img_name]
+
+        return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+    dataset = CalibrationImageNet(
+        data_path,
+        transform=T.Compose(
+            [
+                T.Resize(256),
+                T.CenterCrop(img_sz),
+                T.ToTensor(),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        ),
+    )
+
+    calibration_dataset = dataset
+    if num_samples is not None:
+        calibration_dataset = torch.utils.data.Subset(
+            dataset, indices=range(num_samples)
+        )
+
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=True,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    verify_dataloader = DataLoader(
+        dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+    num_samples = min(total_sample, step * batch_size)
+    if step < 0:
+        num_samples = None
+    calibration_dataloader, _ = create_dataloaders(
+        dataset_dir,
+        img_sz=img_sz,
+        batch_size=batch_size,
+        workers=workers,
+        num_samples=num_samples,
+    )
+    return calibration_dataloader
diff --git a/models/cv/classification/convnext_small/ixrt/common.py b/models/cv/classification/convnext_small/ixrt/common.py
new file mode 100644
index 00000000..fd6a84d8
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/common.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+    batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+    values, indices = batch_score.topk(5)
+    top1, top5 = 0, 0
+    for idx, label in enumerate(batch_label):
+
+        if label == indices[idx][0]:
+            top1 += 1
+        if label in indices[idx]:
+            top5 += 1
+    return top1, top5
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/classification/convnext_small/ixrt/config/CONVNEXT_CONFIG b/models/cv/classification/convnext_small/ixrt/config/CONVNEXT_CONFIG
new file mode 100644
index 00000000..26112ba6
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/config/CONVNEXT_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=Convnext_small
+ORIGINE_MODEL=convnext_small.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=minmax
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/convnext_small/ixrt/export.py b/models/cv/classification/convnext_small/ixrt/export.py
new file mode 100644
index 00000000..a58e2d60
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.convnext_small()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = None, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/convnext_small/ixrt/inference.py b/models/cv/classification/convnext_small/ixrt/inference.py
new file mode 100644
index 00000000..22f1644c
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/inference.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+    dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+    # Load Engine && I/O bindings
+    engine, context = create_engine_context(config.engine_file, logger)
+    inputs, outputs, allocations = get_io_bindings(engine)
+
+    # Warm up
+    if config.warm_up > 0:
+        print("\nWarm Start.")
+        for i in range(config.warm_up):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+
+    # Inference
+    if config.test_mode == "FPS":
+        torch.cuda.synchronize()
+        start_time = time.time()
+
+        for i in range(config.loop_count):
+            context.execute_v2(allocations)
+
+        torch.cuda.synchronize()
+        end_time = time.time()
+        forward_time = end_time - start_time
+
+        num_samples = 50000
+        if config.loop_count * config.bsz < num_samples:
+            num_samples = config.loop_count * config.bsz
+        fps = num_samples / forward_time
+
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+        if fps >= config.fps_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+    elif config.test_mode == "ACC":
+
+        ## Prepare the output data
+        output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+        print(f"output shape : {output.shape} output type : {output.dtype}")
+
+        total_sample = 0
+        acc_top1, acc_top5 = 0, 0
+
+        with tqdm(total= len(dataloader)) as _tqdm:
+            for idx, (batch_data, batch_label) in enumerate(dataloader):
+                batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+                batch_data = np.ascontiguousarray(batch_data)
+                total_sample += batch_data.shape[0]
+                (err,) = cudart.cudaMemcpy(
+                inputs[0]["allocation"],
+                batch_data,
+                batch_data.nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+                context.execute_v2(allocations)
+                (err,) = cudart.cudaMemcpy(
+                output,
+                outputs[0]["allocation"],
+                outputs[0]["nbytes"],
+                cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+                # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+                if len(output.shape) == 4:
+                    output = output.squeeze(axis=(2,3))
+
+                batch_top1, batch_top5 = eval_batch(output, batch_label)
+                acc_top1 += batch_top1
+                acc_top5 += batch_top5
+
+                _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+                                    acc_5='{:.4f}'.format(acc_top5/total_sample))
+                _tqdm.update(1)
+        err, = cudart.cudaFree(inputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        err, = cudart.cudaFree(outputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+        print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+        acc1 = acc_top1/total_sample
+        print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+        if acc1 >= config.acc_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+def parse_config():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+    parser.add_argument(
+        "--engine_file",
+        type=str,
+        help="engine file path"
+    )
+    parser.add_argument(
+        "--datasets_dir",
+        type=str,
+        default="",
+        help="ImageNet dir",
+    )
+    parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+    parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+    parser.add_argument(
+        "--imgsz",
+        "--img",
+        "--img-size",
+        type=int,
+        default=224,
+        help="inference size h,w",
+    )
+    parser.add_argument("--use_async", action="store_true")
+    parser.add_argument(
+        "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+    )
+    parser.add_argument("--fps_target", type=float, default=-1.0)
+    parser.add_argument("--acc_target", type=float, default=-1.0)
+    parser.add_argument("--loop_count", type=int, default=-1)
+
+    config = parser.parse_args()
+    return config
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
diff --git a/models/cv/classification/convnext_small/ixrt/modify_batchsize.py b/models/cv/classification/convnext_small/ixrt/modify_batchsize.py
new file mode 100644
index 00000000..689b7a97
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/modify_batchsize.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+    batch_size = bsz
+
+    # The following code changes the first dimension of every input to be batch_size
+    # Modify as appropriate ... note that this requires all inputs to
+    # have the same batch_size
+    inputs = model.graph.input
+    for input in inputs:
+        # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+        # Add checks as needed.
+        dim1 = input.type.tensor_type.shape.dim[0]
+        # update dim to be a symbolic value
+        if isinstance(batch_size, str):
+            # set dynamic batch size
+            dim1.dim_param = batch_size
+        elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+            # set given batch size
+            dim1.dim_value = int(batch_size)
+        else:
+            # set batch size of 1
+            dim1.dim_value = 1
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--batch_size", type=int)
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+    
+
+
+
diff --git a/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_accuracy.sh b/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_accuracy.sh
new file mode 100644
index 00000000..a43c1a20
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_accuracy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/CONVNEXT_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_performance.sh b/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_performance.sh
new file mode 100644
index 00000000..3e5bca55
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_performance.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/CONVNEXT_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --fps_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/convnext_small/ixrt/simplify_model.py b/models/cv/classification/convnext_small/ixrt/simplify_model.py
new file mode 100644
index 00000000..9948a9fa
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/simplify_model.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+    onnx_model = onnx.load(args.origin_model)
+    model_simp, check = simplify(onnx_model)
+    model_simp = onnx.shape_inference.infer_shapes(model_simp)
+    onnx.save(model_simp, args.output_model)
+    print("  Simplify onnx Done.")
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    parser.add_argument("--reshape", action="store_true")
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+simplify_model(args)
+    
+
+
+
-- 
Gitee


From 5454a0b1d8960bf59659242526a207712000e659 Mon Sep 17 00:00:00 2001
From: "xinchi.tian" <xinchi.tian@iluvatar.com>
Date: Wed, 9 Oct 2024 14:42:20 +0800
Subject: [PATCH 6/8] Add ResNext_50 in IxRT link #IAVG3P

Signed-off-by: xinchi.tian <xinchi.tian@iluvatar.com>
---
 .../resnext50_32x4d/ixrt/README.md            |  53 ++++++
 .../resnext50_32x4d/ixrt/build_engine.py      |  52 ++++++
 .../ixrt/calibration_dataset.py               | 113 ++++++++++++
 .../resnext50_32x4d/ixrt/common.py            |  81 +++++++++
 .../ixrt/config/RESNEXT50_CONFIG              |  34 ++++
 .../resnext50_32x4d/ixrt/export.py            |  61 +++++++
 .../resnext50_32x4d/ixrt/inference.py         | 172 ++++++++++++++++++
 .../resnext50_32x4d/ixrt/modify_batchsize.py  |  57 ++++++
 .../infer_resnext50_32x4d_fp16_accuracy.sh    | 119 ++++++++++++
 .../infer_resnext50_32x4d_fp16_performance.sh | 120 ++++++++++++
 .../resnext50_32x4d/ixrt/simplify_model.py    |  41 +++++
 11 files changed, 903 insertions(+)
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/README.md
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/build_engine.py
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/calibration_dataset.py
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/common.py
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/config/RESNEXT50_CONFIG
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/export.py
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/inference.py
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/modify_batchsize.py
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_accuracy.sh
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_performance.sh
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/simplify_model.py

diff --git a/models/cv/classification/resnext50_32x4d/ixrt/README.md b/models/cv/classification/resnext50_32x4d/ixrt/README.md
new file mode 100644
index 00000000..ea2f1c88
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/README.md
@@ -0,0 +1,53 @@
+# ResNext50_32x4d
+
+## Description
+
+The ResNeXt50_32x4d model is a convolutional neural network architecture designed for image classification tasks. It is an extension of the ResNet (Residual Network) architecture, incorporating the concept of cardinality to enhance model performance.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install ppq
+pip3 install cuda-python
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight resnext50_32x4d-7cdf4587.pth --output resnext50_32x4d.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_resnext50_32x4d_fp16_accuracy.sh
+# Performance
+bash scripts/infer_resnext50_32x4d_fp16_performance.sh
+```
+
+## Results
+
+Model           |BatchSize  |Precision |FPS      |Top-1(%)  |Top-5(%)
+----------------|-----------|----------|---------|----------|--------
+resnext50_32x4d |    32     |   FP16   | 417.01  |  77.614  | 93.686
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/build_engine.py b/models/cv/classification/resnext50_32x4d/ixrt/build_engine.py
new file mode 100644
index 00000000..038c15d5
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/build_engine.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+def main(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    build_config.set_flag(precision)
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default=None)
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
\ No newline at end of file
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/calibration_dataset.py b/models/cv/classification/resnext50_32x4d/ixrt/calibration_dataset.py
new file mode 100644
index 00000000..d7525d51
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/calibration_dataset.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+    def __init__(self, *args, **kwargs):
+        super(CalibrationImageNet, self).__init__(*args, **kwargs)
+        img2label_path = os.path.join(self.root, "val_map.txt")
+        if not os.path.exists(img2label_path):
+            raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+        self.img2label_map = self.make_img2label_map(img2label_path)
+
+    def make_img2label_map(self, path):
+        with open(path) as f:
+            lines = f.readlines()
+
+        img2lable_map = dict()
+        for line in lines:
+            line = line.lstrip().rstrip().split("\t")
+            if len(line) != 2:
+                continue
+            img_name, label = line
+            img_name = img_name.strip()
+            if img_name in [None, ""]:
+                continue
+            label = int(label.strip())
+            img2lable_map[img_name] = label
+        return img2lable_map
+
+    def __getitem__(self, index):
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        # if self.target_transform is not None:
+        #     target = self.target_transform(target)
+        img_name = os.path.basename(path)
+        target = self.img2label_map[img_name]
+
+        return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+    dataset = CalibrationImageNet(
+        data_path,
+        transform=T.Compose(
+            [
+                T.Resize(256),
+                T.CenterCrop(img_sz),
+                T.ToTensor(),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        ),
+    )
+
+    calibration_dataset = dataset
+    if num_samples is not None:
+        calibration_dataset = torch.utils.data.Subset(
+            dataset, indices=range(num_samples)
+        )
+
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    verify_dataloader = DataLoader(
+        dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+    num_samples = min(total_sample, step * batch_size)
+    if step < 0:
+        num_samples = None
+    calibration_dataloader, _ = create_dataloaders(
+        dataset_dir,
+        img_sz=img_sz,
+        batch_size=batch_size,
+        workers=workers,
+        num_samples=num_samples,
+    )
+    return calibration_dataloader
\ No newline at end of file
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/common.py b/models/cv/classification/resnext50_32x4d/ixrt/common.py
new file mode 100644
index 00000000..2279dc0c
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/common.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+    batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+    values, indices = batch_score.topk(5)
+    top1, top5 = 0, 0
+    for idx, label in enumerate(batch_label):
+
+        if label == indices[idx][0]:
+            top1 += 1
+        if label in indices[idx]:
+            top5 += 1
+    return top1, top5
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes": size,
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/config/RESNEXT50_CONFIG b/models/cv/classification/resnext50_32x4d/ixrt/config/RESNEXT50_CONFIG
new file mode 100644
index 00000000..daf8e41d
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/config/RESNEXT50_CONFIG
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=Resnext50_32x4d
+ORIGINE_MODEL=resnext50_32x4d.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=hist_percentile
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/export.py b/models/cv/classification/resnext50_32x4d/ixrt/export.py
new file mode 100644
index 00000000..52130c66
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.resnext50_32x4d()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = None, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/inference.py b/models/cv/classification/resnext50_32x4d/ixrt/inference.py
new file mode 100644
index 00000000..4afba6bc
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/inference.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+    dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+    # Load Engine && I/O bindings
+    engine, context = create_engine_context(config.engine_file, logger)
+    inputs, outputs, allocations = get_io_bindings(engine)
+
+    # Warm up
+    if config.warm_up > 0:
+        print("\nWarm Start.")
+        for i in range(config.warm_up):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+
+    # Inference
+    if config.test_mode == "FPS":
+        torch.cuda.synchronize()
+        start_time = time.time()
+
+        for i in range(config.loop_count):
+            context.execute_v2(allocations)
+
+        torch.cuda.synchronize()
+        end_time = time.time()
+        forward_time = end_time - start_time
+
+        num_samples = 50000
+        if config.loop_count * config.bsz < num_samples:
+            num_samples = config.loop_count * config.bsz
+        fps = num_samples / forward_time
+
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+        if fps >= config.fps_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+    elif config.test_mode == "ACC":
+
+        ## Prepare the output data
+        output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+        print(f"output shape : {output.shape} output type : {output.dtype}")
+
+        total_sample = 0
+        acc_top1, acc_top5 = 0, 0
+
+        with tqdm(total= len(dataloader)) as _tqdm:
+            for idx, (batch_data, batch_label) in enumerate(dataloader):
+                batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+                batch_data = np.ascontiguousarray(batch_data)
+                total_sample += batch_data.shape[0]
+                (err,) = cudart.cudaMemcpy(
+                inputs[0]["allocation"],
+                batch_data,
+                batch_data.nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+                context.execute_v2(allocations)
+                (err,) = cudart.cudaMemcpy(
+                output,
+                outputs[0]["allocation"],
+                outputs[0]["nbytes"],
+                cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+                # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+                if len(output.shape) == 4:
+                    output = output.squeeze(axis=(2,3))
+
+                batch_top1, batch_top5 = eval_batch(output, batch_label)
+                acc_top1 += batch_top1
+                acc_top5 += batch_top5
+
+                _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+                                    acc_5='{:.4f}'.format(acc_top5/total_sample))
+                _tqdm.update(1)
+        err, = cudart.cudaFree(inputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        err, = cudart.cudaFree(outputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+        print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+        acc1 = acc_top1/total_sample
+        print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+        if acc1 >= config.acc_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+def parse_config():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+    parser.add_argument(
+        "--engine_file",
+        type=str,
+        help="engine file path"
+    )
+    parser.add_argument(
+        "--datasets_dir",
+        type=str,
+        default="",
+        help="ImageNet dir",
+    )
+    parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+    parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+    parser.add_argument(
+        "--imgsz",
+        "--img",
+        "--img-size",
+        type=int,
+        default=224,
+        help="inference size h,w",
+    )
+    parser.add_argument("--use_async", action="store_true")
+    parser.add_argument(
+        "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+    )
+    parser.add_argument("--fps_target", type=float, default=-1.0)
+    parser.add_argument("--acc_target", type=float, default=-1.0)
+    parser.add_argument("--loop_count", type=int, default=-1)
+
+    config = parser.parse_args()
+    return config
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/modify_batchsize.py b/models/cv/classification/resnext50_32x4d/ixrt/modify_batchsize.py
new file mode 100644
index 00000000..4ac42a30
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/modify_batchsize.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+    batch_size = bsz
+
+    # The following code changes the first dimension of every input to be batch_size
+    # Modify as appropriate ... note that this requires all inputs to
+    # have the same batch_size
+    inputs = model.graph.input
+    for input in inputs:
+        # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+        # Add checks as needed.
+        dim1 = input.type.tensor_type.shape.dim[0]
+        # update dim to be a symbolic value
+        if isinstance(batch_size, str):
+            # set dynamic batch size
+            dim1.dim_param = batch_size
+        elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+            # set given batch size
+            dim1.dim_value = int(batch_size)
+        else:
+            # set batch size of 1
+            dim1.dim_value = 1
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--batch_size", type=int)
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+    
+
+
+
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_accuracy.sh b/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_accuracy.sh
new file mode 100644
index 00000000..0bd3fab4
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_accuracy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/RESNEXT50_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_performance.sh b/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_performance.sh
new file mode 100644
index 00000000..511e9cee
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_performance.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/RESNEXT50_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --fps_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/simplify_model.py b/models/cv/classification/resnext50_32x4d/ixrt/simplify_model.py
new file mode 100644
index 00000000..4d53a474
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/simplify_model.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+    onnx_model = onnx.load(args.origin_model)
+    model_simp, check = simplify(onnx_model)
+    model_simp = onnx.shape_inference.infer_shapes(model_simp)
+    onnx.save(model_simp, args.output_model)
+    print("  Simplify onnx Done.")
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    parser.add_argument("--reshape", action="store_true")
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+simplify_model(args)
+    
+
+
+
-- 
Gitee


From 93f4a4da82ae552ca6e418b5811eb5bb4f3c77bc Mon Sep 17 00:00:00 2001
From: "xinchi.tian" <xinchi.tian@iluvatar.com>
Date: Wed, 9 Oct 2024 14:53:53 +0800
Subject: [PATCH 7/8] Add efficientnet_b2 in IxRT link #IAVG3P

Signed-off-by: xinchi.tian <xinchi.tian@iluvatar.com>
---
 .../efficientnet_b2/ixrt/README.md            |  56 ++++++
 .../efficientnet_b2/ixrt/build_engine.py      |  52 ++++++
 .../ixrt/calibration_dataset.py               | 113 ++++++++++++
 .../efficientnet_b2/ixrt/common.py            |  81 +++++++++
 .../ixrt/config/EFFICIENTNET_B2_CONFIG        |  34 ++++
 .../efficientnet_b2/ixrt/export.py            |  61 +++++++
 .../efficientnet_b2/ixrt/inference.py         | 172 ++++++++++++++++++
 .../efficientnet_b2/ixrt/modify_batchsize.py  |  57 ++++++
 .../infer_efficientnet_b1_fp16_accuracy.sh    | 119 ++++++++++++
 .../infer_efficientnet_b1_fp16_performance.sh | 119 ++++++++++++
 .../efficientnet_b2/ixrt/simplify_model.py    |  41 +++++
 11 files changed, 905 insertions(+)
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/README.md
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/build_engine.py
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/calibration_dataset.py
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/common.py
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/config/EFFICIENTNET_B2_CONFIG
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/export.py
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/inference.py
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/modify_batchsize.py
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_accuracy.sh
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_performance.sh
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/simplify_model.py

diff --git a/models/cv/classification/efficientnet_b2/ixrt/README.md b/models/cv/classification/efficientnet_b2/ixrt/README.md
new file mode 100644
index 00000000..20410cfc
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/README.md
@@ -0,0 +1,56 @@
+# EfficientNet B2
+
+## Description
+
+EfficientNet B2 is a member of the EfficientNet family, a series of convolutional neural network architectures that are designed to achieve excellent accuracy and efficiency. Introduced by researchers at Google, EfficientNets utilize the compound scaling method, which uniformly scales the depth, width, and resolution of the network to improve accuracy and efficiency.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/efficientnet_b2_rwightman-c35c1473.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight efficientnet_b2_rwightman-c35c1473.pth --output efficientnet_b2.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_efficientnet_b1_fp16_accuracy.sh
+# Performance
+bash scripts/infer_efficientnet_b1_fp16_performance.sh
+```
+
+
+## Results
+
+Model           |BatchSize  |Precision |FPS      |Top-1(%)  |Top-5(%)
+----------------|-----------|----------|---------|----------|--------
+EfficientNet_B2 |    32     |   FP16   | 1450.04 |  77.79   | 93.76
diff --git a/models/cv/classification/efficientnet_b2/ixrt/build_engine.py b/models/cv/classification/efficientnet_b2/ixrt/build_engine.py
new file mode 100644
index 00000000..038c15d5
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/build_engine.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+def main(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    build_config.set_flag(precision)
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default=None)
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/ixrt/calibration_dataset.py b/models/cv/classification/efficientnet_b2/ixrt/calibration_dataset.py
new file mode 100644
index 00000000..d7525d51
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/calibration_dataset.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+    def __init__(self, *args, **kwargs):
+        super(CalibrationImageNet, self).__init__(*args, **kwargs)
+        img2label_path = os.path.join(self.root, "val_map.txt")
+        if not os.path.exists(img2label_path):
+            raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+        self.img2label_map = self.make_img2label_map(img2label_path)
+
+    def make_img2label_map(self, path):
+        with open(path) as f:
+            lines = f.readlines()
+
+        img2lable_map = dict()
+        for line in lines:
+            line = line.lstrip().rstrip().split("\t")
+            if len(line) != 2:
+                continue
+            img_name, label = line
+            img_name = img_name.strip()
+            if img_name in [None, ""]:
+                continue
+            label = int(label.strip())
+            img2lable_map[img_name] = label
+        return img2lable_map
+
+    def __getitem__(self, index):
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        # if self.target_transform is not None:
+        #     target = self.target_transform(target)
+        img_name = os.path.basename(path)
+        target = self.img2label_map[img_name]
+
+        return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+    dataset = CalibrationImageNet(
+        data_path,
+        transform=T.Compose(
+            [
+                T.Resize(256),
+                T.CenterCrop(img_sz),
+                T.ToTensor(),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        ),
+    )
+
+    calibration_dataset = dataset
+    if num_samples is not None:
+        calibration_dataset = torch.utils.data.Subset(
+            dataset, indices=range(num_samples)
+        )
+
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    verify_dataloader = DataLoader(
+        dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+    num_samples = min(total_sample, step * batch_size)
+    if step < 0:
+        num_samples = None
+    calibration_dataloader, _ = create_dataloaders(
+        dataset_dir,
+        img_sz=img_sz,
+        batch_size=batch_size,
+        workers=workers,
+        num_samples=num_samples,
+    )
+    return calibration_dataloader
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/ixrt/common.py b/models/cv/classification/efficientnet_b2/ixrt/common.py
new file mode 100644
index 00000000..69bc5bd6
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/common.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+    batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+    values, indices = batch_score.topk(5)
+    top1, top5 = 0, 0
+    for idx, label in enumerate(batch_label):
+
+        if label == indices[idx][0]:
+            top1 += 1
+        if label in indices[idx]:
+            top5 += 1
+    return top1, top5
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+            "nbytes" : size
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
diff --git a/models/cv/classification/efficientnet_b2/ixrt/config/EFFICIENTNET_B2_CONFIG b/models/cv/classification/efficientnet_b2/ixrt/config/EFFICIENTNET_B2_CONFIG
new file mode 100644
index 00000000..2fe5087c
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/config/EFFICIENTNET_B2_CONFIG
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=EfficientNet_b2
+ORIGINE_MODEL=efficientnet_b2.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=hist_percentile
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/efficientnet_b2/ixrt/export.py b/models/cv/classification/efficientnet_b2/ixrt/export.py
new file mode 100644
index 00000000..2056b473
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.efficientnet_b2()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = None, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/efficientnet_b2/ixrt/inference.py b/models/cv/classification/efficientnet_b2/ixrt/inference.py
new file mode 100644
index 00000000..4afba6bc
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/inference.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+    dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+    # Load Engine && I/O bindings
+    engine, context = create_engine_context(config.engine_file, logger)
+    inputs, outputs, allocations = get_io_bindings(engine)
+
+    # Warm up
+    if config.warm_up > 0:
+        print("\nWarm Start.")
+        for i in range(config.warm_up):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+
+    # Inference
+    if config.test_mode == "FPS":
+        torch.cuda.synchronize()
+        start_time = time.time()
+
+        for i in range(config.loop_count):
+            context.execute_v2(allocations)
+
+        torch.cuda.synchronize()
+        end_time = time.time()
+        forward_time = end_time - start_time
+
+        num_samples = 50000
+        if config.loop_count * config.bsz < num_samples:
+            num_samples = config.loop_count * config.bsz
+        fps = num_samples / forward_time
+
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+        if fps >= config.fps_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+    elif config.test_mode == "ACC":
+
+        ## Prepare the output data
+        output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+        print(f"output shape : {output.shape} output type : {output.dtype}")
+
+        total_sample = 0
+        acc_top1, acc_top5 = 0, 0
+
+        with tqdm(total= len(dataloader)) as _tqdm:
+            for idx, (batch_data, batch_label) in enumerate(dataloader):
+                batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+                batch_data = np.ascontiguousarray(batch_data)
+                total_sample += batch_data.shape[0]
+                (err,) = cudart.cudaMemcpy(
+                inputs[0]["allocation"],
+                batch_data,
+                batch_data.nbytes,
+                cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+                context.execute_v2(allocations)
+                (err,) = cudart.cudaMemcpy(
+                output,
+                outputs[0]["allocation"],
+                outputs[0]["nbytes"],
+                cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+                )
+                assert err == cudart.cudaError_t.cudaSuccess
+                # cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+                # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+                if len(output.shape) == 4:
+                    output = output.squeeze(axis=(2,3))
+
+                batch_top1, batch_top5 = eval_batch(output, batch_label)
+                acc_top1 += batch_top1
+                acc_top5 += batch_top5
+
+                _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+                                    acc_5='{:.4f}'.format(acc_top5/total_sample))
+                _tqdm.update(1)
+        err, = cudart.cudaFree(inputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        err, = cudart.cudaFree(outputs[0]["allocation"])
+        assert err == cudart.cudaError_t.cudaSuccess
+        print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+        print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+        acc1 = acc_top1/total_sample
+        print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+        if acc1 >= config.acc_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+def parse_config():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+    parser.add_argument(
+        "--engine_file",
+        type=str,
+        help="engine file path"
+    )
+    parser.add_argument(
+        "--datasets_dir",
+        type=str,
+        default="",
+        help="ImageNet dir",
+    )
+    parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+    parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+    parser.add_argument(
+        "--imgsz",
+        "--img",
+        "--img-size",
+        type=int,
+        default=224,
+        help="inference size h,w",
+    )
+    parser.add_argument("--use_async", action="store_true")
+    parser.add_argument(
+        "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+    )
+    parser.add_argument("--fps_target", type=float, default=-1.0)
+    parser.add_argument("--acc_target", type=float, default=-1.0)
+    parser.add_argument("--loop_count", type=int, default=-1)
+
+    config = parser.parse_args()
+    return config
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
diff --git a/models/cv/classification/efficientnet_b2/ixrt/modify_batchsize.py b/models/cv/classification/efficientnet_b2/ixrt/modify_batchsize.py
new file mode 100644
index 00000000..4ac42a30
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/modify_batchsize.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+    batch_size = bsz
+
+    # The following code changes the first dimension of every input to be batch_size
+    # Modify as appropriate ... note that this requires all inputs to
+    # have the same batch_size
+    inputs = model.graph.input
+    for input in inputs:
+        # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+        # Add checks as needed.
+        dim1 = input.type.tensor_type.shape.dim[0]
+        # update dim to be a symbolic value
+        if isinstance(batch_size, str):
+            # set dynamic batch size
+            dim1.dim_param = batch_size
+        elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+            # set given batch size
+            dim1.dim_value = int(batch_size)
+        else:
+            # set batch size of 1
+            dim1.dim_value = 1
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--batch_size", type=int)
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+    
+
+
+
diff --git a/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_accuracy.sh b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_accuracy.sh
new file mode 100644
index 00000000..48707017
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_accuracy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNET_B2_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_performance.sh b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_performance.sh
new file mode 100644
index 00000000..cd0ece77
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_performance.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNET_B2_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --fps_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/ixrt/simplify_model.py b/models/cv/classification/efficientnet_b2/ixrt/simplify_model.py
new file mode 100644
index 00000000..4d53a474
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/simplify_model.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+    onnx_model = onnx.load(args.origin_model)
+    model_simp, check = simplify(onnx_model)
+    model_simp = onnx.shape_inference.infer_shapes(model_simp)
+    onnx.save(model_simp, args.output_model)
+    print("  Simplify onnx Done.")
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    parser.add_argument("--reshape", action="store_true")
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+simplify_model(args)
+    
+
+
+
-- 
Gitee


From f01083bbd5a9c250cca6a345afd6875e41fb7711 Mon Sep 17 00:00:00 2001
From: "xinchi.tian" <xinchi.tian@iluvatar.com>
Date: Thu, 17 Oct 2024 13:59:25 +0800
Subject: [PATCH 8/8] Use cuda-python

---
 models/cv/detection/yolov8/ixrt/README.md    |  2 +-
 models/cv/detection/yolov8/ixrt/common.py    |  6 ++++--
 models/cv/detection/yolov8/ixrt/inference.py | 18 +++++++++++++-----
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/models/cv/detection/yolov8/ixrt/README.md b/models/cv/detection/yolov8/ixrt/README.md
index 07558edf..96d637e0 100644
--- a/models/cv/detection/yolov8/ixrt/README.md
+++ b/models/cv/detection/yolov8/ixrt/README.md
@@ -20,7 +20,7 @@ pip3 install onnx
 pip3 install onnxsim
 pip3 install pycocotools
 pip3 install ultralytics
-pip3 install pycuda
+pip3 install cuda-python
 ```
 
 ### Download
diff --git a/models/cv/detection/yolov8/ixrt/common.py b/models/cv/detection/yolov8/ixrt/common.py
index dc3c2766..3f28ccbc 100644
--- a/models/cv/detection/yolov8/ixrt/common.py
+++ b/models/cv/detection/yolov8/ixrt/common.py
@@ -20,7 +20,7 @@ import numpy as np
 from tqdm import tqdm
 
 import tensorrt
-import pycuda.driver as cuda
+from cuda import cuda, cudart
 
 
 def load_class_names(namesfile):
@@ -101,13 +101,15 @@ def setup_io_bindings(engine, context):
         size = np.dtype(tensorrt.nptype(dtype)).itemsize
         for s in shape:
             size *= s
-        allocation = cuda.mem_alloc(size)
+        err, allocation = cudart.cudaMalloc(size)
+        assert err == cudart.cudaError_t.cudaSuccess
         binding = {
             "index": i,
             "name": name,
             "dtype": np.dtype(tensorrt.nptype(dtype)),
             "shape": list(shape),
             "allocation": allocation,
+            "nbytes": size
         }
         # print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
         allocations.append(allocation)
diff --git a/models/cv/detection/yolov8/ixrt/inference.py b/models/cv/detection/yolov8/ixrt/inference.py
index d83b0136..9abc2142 100644
--- a/models/cv/detection/yolov8/ixrt/inference.py
+++ b/models/cv/detection/yolov8/ixrt/inference.py
@@ -19,8 +19,7 @@ import argparse
 import time
 import tensorrt
 from tensorrt import Dims
-import pycuda.autoinit
-import pycuda.driver as cuda
+from cuda import cuda, cudart
 import torch
 import numpy as np
 from tqdm import tqdm
@@ -157,7 +156,8 @@ class IxRT_Validator(DetectionValidator):
             context.set_binding_shape(input_idx, Dims(data_shape))
             inputs, outputs, allocations = setup_io_bindings(engine, context)
 
-            cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+            err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes)
+            assert(err == cuda.CUresult.CUDA_SUCCESS)
             # Prepare the output data
             output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
             
@@ -167,7 +167,15 @@ class IxRT_Validator(DetectionValidator):
             end_time = time.time()
             forward_time += end_time - start_time
             
-            cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+            err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"])
+            assert(err == cuda.CUresult.CUDA_SUCCESS)
+
+            for alloc in allocations:
+                if not alloc:
+                    continue
+                (err,) = cudart.cudaFree(alloc)
+                assert err == cudart.cudaError_t.cudaSuccess   
+                
             if pad_batch:
                 output = output[:origin_size]
                 
@@ -176,7 +184,7 @@ class IxRT_Validator(DetectionValidator):
             preds = self.postprocess([outputs])
             
             self.update_metrics(preds, batch)
-                
+
         if config.perf_only:
             fps = num_samples / forward_time
             return fps
-- 
Gitee