From c9a6863d68825428ae6031b2f15f055dd1aca68e Mon Sep 17 00:00:00 2001
From: "xinchi.tian" <xinchi.tian@iluvatar.com>
Date: Tue, 23 Apr 2024 14:57:19 +0800
Subject: [PATCH 1/5] Add EfficientnetV2 in IXRT

link #I9FP0V
Add EfficientnetV2 in IXRT

Signed-off-by: xinchi.tian <xinchi.tian@iluvatar.com>
---
 .../efficientnet_v2/ixrt/README.md            |  67 ++
 .../efficientnet_v2/ixrt/build_engine.py      | 106 ++
 .../efficientnet_v2/ixrt/build_i8_engine.py   | 113 +++
 .../ixrt/calibration_dataset.py               | 113 +++
 .../efficientnet_v2/ixrt/common.py            |  79 ++
 .../ixrt/config/EFFICIENTNET_V2T_CONFIG       |  34 +
 .../efficientnet_v2/ixrt/export_onnx.py       | 950 ++++++++++++++++++
 .../efficientnet_v2/ixrt/inference.py         | 158 +++
 .../efficientnet_v2/ixrt/modify_batchsize.py  |  57 ++
 .../efficientnet_v2/ixrt/quant.py             | 167 +++
 .../efficientnet_v2/ixrt/refine_model.py      | 291 ++++++
 .../infer_efficientnet_fp16_accuracy.sh       | 145 +++
 .../infer_efficientnet_fp16_performance.sh    | 145 +++
 .../infer_efficientnet_int8_accuracy.sh       | 146 +++
 .../infer_efficientnet_int8_performance.sh    | 145 +++
 .../efficientnet_v2/ixrt/simplify_model.py    |  41 +
 16 files changed, 2757 insertions(+)
 create mode 100755 models/cv/classification/efficientnet_v2/ixrt/README.md
 create mode 100755 models/cv/classification/efficientnet_v2/ixrt/build_engine.py
 create mode 100644 models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py
 create mode 100644 models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py
 create mode 100644 models/cv/classification/efficientnet_v2/ixrt/common.py
 create mode 100644 models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2T_CONFIG
 create mode 100755 models/cv/classification/efficientnet_v2/ixrt/export_onnx.py
 create mode 100644 models/cv/classification/efficientnet_v2/ixrt/inference.py
 create mode 100644 models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py
 create mode 100644 models/cv/classification/efficientnet_v2/ixrt/quant.py
 create mode 100644 models/cv/classification/efficientnet_v2/ixrt/refine_model.py
 create mode 100755 models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_accuracy.sh
 create mode 100755 models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_performance.sh
 create mode 100755 models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_accuracy.sh
 create mode 100755 models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_performance.sh
 create mode 100644 models/cv/classification/efficientnet_v2/ixrt/simplify_model.py

diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md
new file mode 100755
index 00000000..d06cff37
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/README.md
@@ -0,0 +1,67 @@
+# EfficientnetV2
+
+## Description
+EfficientNetV2 is an improved version of the EfficientNet architecture proposed by Google, aiming to enhance model performance and efficiency. Unlike the original EfficientNet, EfficientNetV2 features a simplified design and incorporates a series of enhancement strategies to further boost performance.
+
+## Setup
+
+### Install
+```bash
+yum install mesa-libGL
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install timm
+pip3 install ppq
+pip3 install protobuf==3.20.0
+```
+
+### Download
+Pretrained model: <https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_t_agc-3620981a.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+```bash
+mkdir checkpoints
+git clone https://github.com/huggingface/pytorch-image-models.git
+cp /Path/to/ixrt/export_onnx.py pytorch-image-models/timm/models
+cd pytorch-image-models/timm/models
+python3 export_onnx.py --origin_model /path/to/efficientnetv2_t_agc-3620981a.pth --output_model checkpoints/efficientnet.onnx
+```
+
+## Inference
+```bash
+export PROJ_DIR=/Path/to/efficientnet_v2/ixrt
+export DATASETS_DIR=/path/to/imagenet_val/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=/Path/to/efficientnet_v2/ixrt
+export CONFIG_DIR=/Path/to/config/EFFICIENTNET_V2T_CONFIG
+export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+```
+### FP16
+
+```bash
+# Accuracy
+bash script/infer_efficientnet_fp16_accuracy.sh
+# Performance
+bash script/infer_efficientnet_fp16_performance.sh
+```
+
+### INT8
+```bash
+# Accuracy
+bash script/infer_efficientnet_int8_accuracy.sh
+# Performance
+bash script/infer_efficientnet_int8_performance.sh
+```
+
+
+
+## Results
+
+Model          | BatchSize | Precision |   FPS    | Top-1(%) | Top-5(%)
+---------------|-----------|-----------|----------|----------|--------
+EfficientnetV2 |    32     |   FP16    | 1882.87  |  82.14   | 96.16
+EfficientnetV2 |    32     |   INT8    | 2595.96  |  81.50   | 95.96
diff --git a/models/cv/classification/efficientnet_v2/ixrt/build_engine.py b/models/cv/classification/efficientnet_v2/ixrt/build_engine.py
new file mode 100755
index 00000000..41e6af8d
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/build_engine.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+from calibration_dataset import getdataloader
+import cuda.cudart as cudart
+
+def assertSuccess(err):
+    assert(err == cudart.cudaError_t.cudaSuccess)
+
+class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2):
+
+    def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224):
+        super().__init__()
+        self.cache_file = cache_file
+        self.image_batcher  = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz)
+        self.batch_generator = iter(self.image_batcher)
+        size = img_sz*img_sz*3*bsz
+        __import__('pdb').set_trace()
+        err, self.batch_allocation = cudart.cudaMalloc(size)
+        assertSuccess(err)
+
+    def __del__(self):
+        err,= cudart.cudaFree(self.batch_allocation)
+        assertSuccess(err)
+
+    def get_batch_size(self):
+        return self.image_batcher.batch_size
+
+    def get_batch(self, names):
+        try:
+            batch, _ = next(self.batch_generator)
+            batch = batch.numpy()
+            __import__('pdb').set_trace()
+            cudart.cudaMemcpy(self.batch_allocation,
+                              np.ascontiguousarray(batch),
+                              batch.nbytes,
+                              cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
+            return [int(self.batch_allocation)]
+        except StopIteration:
+            return None
+
+    def read_calibration_cache(self):
+        if os.path.exists(self.cache_file):
+            with open(self.cache_file, "rb") as f:
+                return f.read()
+
+    def write_calibration_cache(self, cache):
+        with open(self.cache_file, "wb") as f:
+            f.write(cache)
+
+def main(config):
+    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE)
+    builder = tensorrt.Builder(IXRT_LOGGER)
+    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(EXPLICIT_BATCH)
+    build_config = builder.create_builder_config()
+    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+    parser.parse_from_file(config.model)
+
+    precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+    print("precision : ", precision)
+    build_config.set_flag(precision)
+    if config.precision == "int8":
+        build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir)
+
+    plan = builder.build_serialized_network(network, build_config)
+    engine_file_path = config.engine
+    with open(engine_file_path, "wb") as f:
+        f.write(plan)
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+            help="The precision of datatype")
+    parser.add_argument("--engine", type=str, default=None)
+    parser.add_argument(
+        "--datasets_dir",
+        type=str,
+        default="",
+        help="ImageNet dir",
+    )
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py b/models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py
new file mode 100644
index 00000000..6e356260
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+import json
+import os
+
+import tensorrt
+import tensorrt as trt
+
+TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE)
+
+EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+
+
+def GiB(val):
+    return val * 1 << 30
+
+
+def json_load(filename):
+    with open(filename) as json_file:
+        data = json.load(json_file)
+    return data
+
+
+def setDynamicRange(network, json_file):
+    """Sets ranges for network layers."""
+    quant_param_json = json_load(json_file)
+    act_quant = quant_param_json["act_quant_info"]
+
+    for i in range(network.num_inputs):
+        input_tensor = network.get_input(i)
+        if act_quant.__contains__(input_tensor.name):
+            print(input_tensor.name)
+            value = act_quant[input_tensor.name]
+            tensor_max = abs(value)
+            tensor_min = -abs(value)
+            input_tensor.dynamic_range = (tensor_min, tensor_max)
+
+    for i in range(network.num_layers):
+        layer = network.get_layer(i)
+
+        for output_index in range(layer.num_outputs):
+            tensor = layer.get_output(output_index)
+
+            if act_quant.__contains__(tensor.name):
+                value = act_quant[tensor.name]
+                tensor_max = abs(value)
+                tensor_min = -abs(value)
+                tensor.dynamic_range = (tensor_min, tensor_max)
+            else:
+                print("\033[1;32m%s\033[0m" % tensor.name)
+
+
+def build_engine(onnx_file, json_file, engine_file):
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(EXPLICIT_BATCH)
+
+    config = builder.create_builder_config()
+
+    # If it is a dynamic onnx model , you need to add the following.
+    # profile = builder.create_optimization_profile()
+    # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w))
+    # config.add_optimization_profile(profile)
+
+    parser = trt.OnnxParser(network, TRT_LOGGER)
+    # config.max_workspace_size = GiB(1)
+    if not os.path.exists(onnx_file):
+        quit("ONNX file {} not found".format(onnx_file))
+
+    with open(onnx_file, "rb") as model:
+        if not parser.parse(model.read()):
+            print("ERROR: Failed to parse the ONNX file.")
+            for error in range(parser.num_errors):
+                print(parser.get_error(error))
+            return None
+
+    config.set_flag(trt.BuilderFlag.INT8)
+
+    setDynamicRange(network, json_file)
+
+    engine = builder.build_engine(network, config)
+
+    with open(engine_file, "wb") as f:
+        f.write(engine.serialize())
+
+
+if __name__ == "__main__":
+    # Add plugins if needed
+    # import ctypes
+    # ctypes.CDLL("libmmdeploy_tensorrt_ops.so")
+    parser = argparse.ArgumentParser(
+        description="Writing qparams to onnx to convert tensorrt engine."
+    )
+    parser.add_argument("--onnx", type=str, default=None)
+    parser.add_argument("--qparam_json", type=str, default=None)
+    parser.add_argument("--engine", type=str, default=None)
+    arg = parser.parse_args()
+
+    build_engine(arg.onnx, arg.qparam_json, arg.engine)
+    print("\033[1;32mgenerate %s\033[0m" % arg.engine)
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py b/models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py
new file mode 100644
index 00000000..d7525d51
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+    def __init__(self, *args, **kwargs):
+        super(CalibrationImageNet, self).__init__(*args, **kwargs)
+        img2label_path = os.path.join(self.root, "val_map.txt")
+        if not os.path.exists(img2label_path):
+            raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+        self.img2label_map = self.make_img2label_map(img2label_path)
+
+    def make_img2label_map(self, path):
+        with open(path) as f:
+            lines = f.readlines()
+
+        img2lable_map = dict()
+        for line in lines:
+            line = line.lstrip().rstrip().split("\t")
+            if len(line) != 2:
+                continue
+            img_name, label = line
+            img_name = img_name.strip()
+            if img_name in [None, ""]:
+                continue
+            label = int(label.strip())
+            img2lable_map[img_name] = label
+        return img2lable_map
+
+    def __getitem__(self, index):
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        # if self.target_transform is not None:
+        #     target = self.target_transform(target)
+        img_name = os.path.basename(path)
+        target = self.img2label_map[img_name]
+
+        return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+    dataset = CalibrationImageNet(
+        data_path,
+        transform=T.Compose(
+            [
+                T.Resize(256),
+                T.CenterCrop(img_sz),
+                T.ToTensor(),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        ),
+    )
+
+    calibration_dataset = dataset
+    if num_samples is not None:
+        calibration_dataset = torch.utils.data.Subset(
+            dataset, indices=range(num_samples)
+        )
+
+    calibration_dataloader = DataLoader(
+        calibration_dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    verify_dataloader = DataLoader(
+        dataset,
+        shuffle=False,
+        batch_size=batch_size,
+        drop_last=False,
+        num_workers=workers,
+    )
+
+    return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+    num_samples = min(total_sample, step * batch_size)
+    if step < 0:
+        num_samples = None
+    calibration_dataloader, _ = create_dataloaders(
+        dataset_dir,
+        img_sz=img_sz,
+        batch_size=batch_size,
+        workers=workers,
+        num_samples=num_samples,
+    )
+    return calibration_dataloader
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/common.py b/models/cv/classification/efficientnet_v2/ixrt/common.py
new file mode 100644
index 00000000..abdc147c
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/common.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+import pycuda.driver as cuda
+
+def eval_batch(batch_score, batch_label):
+    batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+    values, indices = batch_score.topk(5)
+    top1, top5 = 0, 0
+    for idx, label in enumerate(batch_label):
+
+        if label == indices[idx][0]:
+            top1 += 1
+        if label in indices[idx]:
+            top5 += 1
+    return top1, top5
+
+def create_engine_context(engine_path, logger):
+    with open(engine_path, "rb") as f:
+        runtime = tensorrt.Runtime(logger)
+        assert runtime
+        engine = runtime.deserialize_cuda_engine(f.read())
+        assert engine
+        context = engine.create_execution_context()
+        assert context
+
+    return engine, context
+
+def get_io_bindings(engine):
+    # Setup I/O bindings
+    inputs = []
+    outputs = []
+    allocations = []
+
+    for i in range(engine.num_bindings):
+        is_input = False
+        if engine.binding_is_input(i):
+            is_input = True
+        name = engine.get_binding_name(i)
+        dtype = engine.get_binding_dtype(i)
+        shape = engine.get_binding_shape(i)
+        if is_input:
+            batch_size = shape[0]
+        size = np.dtype(tensorrt.nptype(dtype)).itemsize
+        for s in shape:
+            size *= s
+        allocation = cuda.mem_alloc(size)
+        binding = {
+            "index": i,
+            "name": name,
+            "dtype": np.dtype(tensorrt.nptype(dtype)),
+            "shape": list(shape),
+            "allocation": allocation,
+        }
+        print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
+        allocations.append(allocation)
+        if engine.binding_is_input(i):
+            inputs.append(binding)
+        else:
+            outputs.append(binding)
+    return inputs, outputs, allocations
diff --git a/models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2T_CONFIG b/models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2T_CONFIG
new file mode 100644
index 00000000..b9e40159
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2T_CONFIG
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=288
+MODEL_NAME=EfficientNetv2_t
+ORIGINE_MODEL=efficientnet.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+    # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
+    # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致，有些op可能推导shape错误(比如Reshape)
+    # QUANT_STEP : 量化步数
+    # QUANT_SEED : 随机种子 保证量化结果可复现
+    # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=hist_percentile
+QUANT_BATCHSIZE=32
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py b/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py
new file mode 100755
index 00000000..2ada1df1
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py
@@ -0,0 +1,950 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from functools import partial
+from typing import List
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
+from _efficientnet_blocks import SqueezeExcite
+from _efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights,\
+    round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT
+from features import FeatureInfo, FeatureHooks
+from helpers import build_model_with_cfg, pretrained_cfg_for_features, checkpoint_seq
+from layers import create_conv2d, create_classifier, get_norm_act_layer, EvoNorm2dS0, GroupNormAct
+from registry import register_model
+import argparse
+import ssl
+
+
+
+ssl._create_default_https_context = ssl._create_unverified_context
+
+__all__ = ['EfficientNet', 'EfficientNetFeatures']
+
+
+def _cfg(url='', **kwargs):
+    return {
+        'url': url, 'num_classes': 1000, 'input_size': (3, 288, 288), 'pool_size': (7, 7),
+        'crop_pct': 0.875, 'interpolation': 'bicubic',
+        'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
+        'first_conv': 'conv_stem', 'classifier': 'classifier',
+        **kwargs
+    }
+
+
+default_cfgs = {
+    'efficientnetv2_rw_t': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_t_agc-3620981a.pth',
+        input_size=(3, 224, 224), test_input_size=(3, 288, 288), pool_size=(7, 7), crop_pct=1.0),
+    'gc_efficientnetv2_rw_t': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gc_efficientnetv2_rw_t_agc-927a0bde.pth',
+        input_size=(3, 224, 224), test_input_size=(3, 288, 288), pool_size=(7, 7), crop_pct=1.0),
+    'efficientnetv2_rw_s': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_v2s_ra2_288-a6477665.pth',
+        input_size=(3, 288, 288), test_input_size=(3, 384, 384), pool_size=(9, 9), crop_pct=1.0),
+    'efficientnetv2_rw_m': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_rw_m_agc-3d90cb1e.pth',
+        input_size=(3, 320, 320), test_input_size=(3, 416, 416), pool_size=(10, 10), crop_pct=1.0),
+
+    'efficientnetv2_s': _cfg(
+        url='',
+        input_size=(3, 288, 288), test_input_size=(3, 384, 384), pool_size=(9, 9), crop_pct=1.0),
+    'efficientnetv2_m': _cfg(
+        url='',
+        input_size=(3, 320, 320), test_input_size=(3, 416, 416), pool_size=(10, 10), crop_pct=1.0),
+    'efficientnetv2_l': _cfg(
+        url='',
+        input_size=(3, 384, 384), test_input_size=(3, 480, 480), pool_size=(12, 12), crop_pct=1.0),
+    'efficientnetv2_xl': _cfg(
+        url='',
+        input_size=(3, 384, 384), test_input_size=(3, 512, 512), pool_size=(12, 12), crop_pct=1.0),
+}
+
+
+class EfficientNet(nn.Module):
+    """ EfficientNet
+
+    A flexible and performant PyTorch implementation of efficient network architectures, including:
+      * EfficientNet-V2 Small, Medium, Large, XL & B0-B3
+      * EfficientNet B0-B8, L2
+      * EfficientNet-EdgeTPU
+      * EfficientNet-CondConv
+      * MixNet S, M, L, XL
+      * MnasNet A1, B1, and small
+      * MobileNet-V2
+      * FBNet C
+      * Single-Path NAS Pixel1
+      * TinyNet
+    """
+
+    def __init__(
+            self, block_args, num_classes=1000, num_features=1280, in_chans=3, stem_size=32, fix_stem=False,
+            output_stride=32, pad_type='', round_chs_fn=round_channels, act_layer=None, norm_layer=None,
+            se_layer=None, drop_rate=0., drop_path_rate=0., global_pool='avg'):
+        super(EfficientNet, self).__init__()
+        act_layer = act_layer or nn.ReLU
+        norm_layer = norm_layer or nn.BatchNorm2d
+        norm_act_layer = get_norm_act_layer(norm_layer, act_layer)
+        se_layer = se_layer or SqueezeExcite
+        self.num_classes = num_classes
+        self.num_features = num_features
+        self.drop_rate = drop_rate
+        self.grad_checkpointing = False
+
+        # Stem
+        if not fix_stem:
+            stem_size = round_chs_fn(stem_size)
+        self.conv_stem = create_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type)
+        self.bn1 = norm_act_layer(stem_size, inplace=True)
+
+        # Middle stages (IR/ER/DS Blocks)
+        builder = EfficientNetBuilder(
+            output_stride=output_stride, pad_type=pad_type, round_chs_fn=round_chs_fn,
+            act_layer=act_layer, norm_layer=norm_layer, se_layer=se_layer, drop_path_rate=drop_path_rate)
+        self.blocks = nn.Sequential(*builder(stem_size, block_args))
+        self.feature_info = builder.features
+        head_chs = builder.in_chs
+
+        # Head + Pooling
+        self.conv_head = create_conv2d(head_chs, self.num_features, 1, padding=pad_type)
+        self.bn2 = norm_act_layer(self.num_features, inplace=True)
+        self.global_pool, self.classifier = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool)
+
+        efficientnet_init_weights(self)
+
+    def as_sequential(self):
+        layers = [self.conv_stem, self.bn1]
+        layers.extend(self.blocks)
+        layers.extend([self.conv_head, self.bn2, self.global_pool])
+        layers.extend([nn.Dropout(self.drop_rate), self.classifier])
+        return nn.Sequential(*layers)
+
+    @torch.jit.ignore
+    def group_matcher(self, coarse=False):
+        return dict(
+            stem=r'^conv_stem|bn1',
+            blocks=[
+                (r'^blocks\.(\d+)' if coarse else r'^blocks\.(\d+)\.(\d+)', None),
+                (r'conv_head|bn2', (99999,))
+            ]
+        )
+
+    @torch.jit.ignore
+    def set_grad_checkpointing(self, enable=True):
+        self.grad_checkpointing = enable
+
+    @torch.jit.ignore
+    def get_classifier(self):
+        return self.classifier
+
+    def reset_classifier(self, num_classes, global_pool='avg'):
+        self.num_classes = num_classes
+        self.global_pool, self.classifier = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool)
+
+    def forward_features(self, x):
+        x = self.conv_stem(x)
+        x = self.bn1(x)
+        if self.grad_checkpointing and not torch.jit.is_scripting():
+            x = checkpoint_seq(self.blocks, x, flatten=True)
+        else:
+            x = self.blocks(x)
+        x = self.conv_head(x)
+        x = self.bn2(x)
+        return x
+
+    def forward_head(self, x, pre_logits: bool = False):
+        x = self.global_pool(x)
+        if self.drop_rate > 0.:
+            x = F.dropout(x, p=self.drop_rate, training=self.training)
+        return x if pre_logits else self.classifier(x)
+
+    def forward(self, x):
+        x = self.forward_features(x)
+        x = self.forward_head(x)
+        return x
+
+
+class EfficientNetFeatures(nn.Module):
+    """ EfficientNet Feature Extractor
+
+    A work-in-progress feature extraction module for EfficientNet, to use as a backbone for segmentation
+    and object detection models.
+    """
+
+    def __init__(
+            self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='bottleneck', in_chans=3,
+            stem_size=32, fix_stem=False, output_stride=32, pad_type='', round_chs_fn=round_channels,
+            act_layer=None, norm_layer=None, se_layer=None, drop_rate=0., drop_path_rate=0.):
+        super(EfficientNetFeatures, self).__init__()
+        act_layer = act_layer or nn.ReLU
+        norm_layer = norm_layer or nn.BatchNorm2d
+        norm_act_layer = get_norm_act_layer(norm_layer, act_layer)
+        se_layer = se_layer or SqueezeExcite
+        self.drop_rate = drop_rate
+
+        # Stem
+        if not fix_stem:
+            stem_size = round_chs_fn(stem_size)
+        self.conv_stem = create_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type)
+        self.bn1 = norm_act_layer(stem_size, inplace=True)
+
+        # Middle stages (IR/ER/DS Blocks)
+        builder = EfficientNetBuilder(
+            output_stride=output_stride, pad_type=pad_type, round_chs_fn=round_chs_fn,
+            act_layer=act_layer, norm_layer=norm_layer, se_layer=se_layer, drop_path_rate=drop_path_rate,
+            feature_location=feature_location)
+        self.blocks = nn.Sequential(*builder(stem_size, block_args))
+        self.feature_info = FeatureInfo(builder.features, out_indices)
+        self._stage_out_idx = {v['stage']: i for i, v in enumerate(self.feature_info) if i in out_indices}
+
+        efficientnet_init_weights(self)
+
+        # Register feature extraction hooks with FeatureHooks helper
+        self.feature_hooks = None
+        if feature_location != 'bottleneck':
+            hooks = self.feature_info.get_dicts(keys=('module', 'hook_type'))
+            self.feature_hooks = FeatureHooks(hooks, self.named_modules())
+
+    def forward(self, x) -> List[torch.Tensor]:
+        x = self.conv_stem(x)
+        x = self.bn1(x)
+        if self.feature_hooks is None:
+            features = []
+            if 0 in self._stage_out_idx:
+                features.append(x)  # add stem out
+            for i, b in enumerate(self.blocks):
+                x = b(x)
+                if i + 1 in self._stage_out_idx:
+                    features.append(x)
+            return features
+        else:
+            self.blocks(x)
+            out = self.feature_hooks.get_output(x.device)
+            return list(out.values())
+
+
+def _create_effnet(variant, pretrained=False, **kwargs):
+    features_only = False
+    model_cls = EfficientNet
+    kwargs_filter = None
+    if kwargs.pop('features_only', False):
+        features_only = True
+        kwargs_filter = ('num_classes', 'num_features', 'head_conv', 'global_pool')
+        model_cls = EfficientNetFeatures
+    model = build_model_with_cfg(
+        model_cls, variant, pretrained,
+        pretrained_strict=not features_only,
+        kwargs_filter=kwargs_filter,
+        **kwargs)
+    if features_only:
+        model.default_cfg = pretrained_cfg_for_features(model.default_cfg)
+    return model
+
+
+def _gen_mnasnet_a1(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a mnasnet-a1 model.
+
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
+    Paper: https://arxiv.org/pdf/1807.11626.pdf.
+
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_e1_c16_noskip'],
+        # stage 1, 112x112 in
+        ['ir_r2_k3_s2_e6_c24'],
+        # stage 2, 56x56 in
+        ['ir_r3_k5_s2_e3_c40_se0.25'],
+        # stage 3, 28x28 in
+        ['ir_r4_k3_s2_e6_c80'],
+        # stage 4, 14x14in
+        ['ir_r2_k3_s1_e6_c112_se0.25'],
+        # stage 5, 14x14in
+        ['ir_r3_k5_s2_e6_c160_se0.25'],
+        # stage 6, 7x7 in
+        ['ir_r1_k3_s1_e6_c320'],
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def),
+        stem_size=32,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_mnasnet_b1(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a mnasnet-b1 model.
+
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
+    Paper: https://arxiv.org/pdf/1807.11626.pdf.
+
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_c16_noskip'],
+        # stage 1, 112x112 in
+        ['ir_r3_k3_s2_e3_c24'],
+        # stage 2, 56x56 in
+        ['ir_r3_k5_s2_e3_c40'],
+        # stage 3, 28x28 in
+        ['ir_r3_k5_s2_e6_c80'],
+        # stage 4, 14x14in
+        ['ir_r2_k3_s1_e6_c96'],
+        # stage 5, 14x14in
+        ['ir_r4_k5_s2_e6_c192'],
+        # stage 6, 7x7 in
+        ['ir_r1_k3_s1_e6_c320_noskip']
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def),
+        stem_size=32,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_mnasnet_small(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a mnasnet-b1 model.
+
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
+    Paper: https://arxiv.org/pdf/1807.11626.pdf.
+
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_c8'],
+        ['ir_r1_k3_s2_e3_c16'],
+        ['ir_r2_k3_s2_e6_c16'],
+        ['ir_r4_k5_s2_e6_c32_se0.25'],
+        ['ir_r3_k3_s1_e6_c32_se0.25'],
+        ['ir_r3_k5_s2_e6_c88_se0.25'],
+        ['ir_r1_k3_s1_e6_c144']
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def),
+        stem_size=8,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_mobilenet_v2(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, fix_stem_head=False, pretrained=False, **kwargs):
+    """ Generate MobileNet-V2 network
+    Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v2.py
+    Paper: https://arxiv.org/abs/1801.04381
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_c16'],
+        ['ir_r2_k3_s2_e6_c24'],
+        ['ir_r3_k3_s2_e6_c32'],
+        ['ir_r4_k3_s2_e6_c64'],
+        ['ir_r3_k3_s1_e6_c96'],
+        ['ir_r3_k3_s2_e6_c160'],
+        ['ir_r1_k3_s1_e6_c320'],
+    ]
+    round_chs_fn = partial(round_channels, multiplier=channel_multiplier)
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier=depth_multiplier, fix_first_last=fix_stem_head),
+        num_features=1280 if fix_stem_head else max(1280, round_chs_fn(1280)),
+        stem_size=32,
+        fix_stem=fix_stem_head,
+        round_chs_fn=round_chs_fn,
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        act_layer=resolve_act_layer(kwargs, 'relu6'),
+        **kwargs
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_fbnetc(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """ FBNet-C
+
+        Paper: https://arxiv.org/abs/1812.03443
+        Ref Impl: https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py
+
+        NOTE: the impl above does not relate to the 'C' variant here, that was derived from paper,
+        it was used to confirm some building block details
+    """
+    arch_def = [
+        ['ir_r1_k3_s1_e1_c16'],
+        ['ir_r1_k3_s2_e6_c24', 'ir_r2_k3_s1_e1_c24'],
+        ['ir_r1_k5_s2_e6_c32', 'ir_r1_k5_s1_e3_c32', 'ir_r1_k5_s1_e6_c32', 'ir_r1_k3_s1_e6_c32'],
+        ['ir_r1_k5_s2_e6_c64', 'ir_r1_k5_s1_e3_c64', 'ir_r2_k5_s1_e6_c64'],
+        ['ir_r3_k5_s1_e6_c112', 'ir_r1_k5_s1_e3_c112'],
+        ['ir_r4_k5_s2_e6_c184'],
+        ['ir_r1_k3_s1_e6_c352'],
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def),
+        stem_size=16,
+        num_features=1984,  # paper suggests this, but is not 100% clear
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_spnasnet(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates the Single-Path NAS model from search targeted for Pixel1 phone.
+
+    Paper: https://arxiv.org/abs/1904.02877
+
+    Args:
+      channel_multiplier: multiplier to number of channels per layer.
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_c16_noskip'],
+        # stage 1, 112x112 in
+        ['ir_r3_k3_s2_e3_c24'],
+        # stage 2, 56x56 in
+        ['ir_r1_k5_s2_e6_c40', 'ir_r3_k3_s1_e3_c40'],
+        # stage 3, 28x28 in
+        ['ir_r1_k5_s2_e6_c80', 'ir_r3_k3_s1_e3_c80'],
+        # stage 4, 14x14in
+        ['ir_r1_k5_s1_e6_c96', 'ir_r3_k5_s1_e3_c96'],
+        # stage 5, 14x14in
+        ['ir_r4_k5_s2_e6_c192'],
+        # stage 6, 7x7 in
+        ['ir_r1_k3_s1_e6_c320_noskip']
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def),
+        stem_size=32,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_efficientnet(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, channel_divisor=8,
+        group_size=None, pretrained=False, **kwargs):
+    """Creates an EfficientNet model.
+
+    Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py
+    Paper: https://arxiv.org/abs/1905.11946
+
+    EfficientNet params
+    name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
+    'efficientnet-b0': (1.0, 1.0, 224, 0.2),
+    'efficientnet-b1': (1.0, 1.1, 240, 0.2),
+    'efficientnet-b2': (1.1, 1.2, 260, 0.3),
+    'efficientnet-b3': (1.2, 1.4, 300, 0.3),
+    'efficientnet-b4': (1.4, 1.8, 380, 0.4),
+    'efficientnet-b5': (1.6, 2.2, 456, 0.4),
+    'efficientnet-b6': (1.8, 2.6, 528, 0.5),
+    'efficientnet-b7': (2.0, 3.1, 600, 0.5),
+    'efficientnet-b8': (2.2, 3.6, 672, 0.5),
+    'efficientnet-l2': (4.3, 5.3, 800, 0.5),
+
+    Args:
+      channel_multiplier: multiplier to number of channels per layer
+      depth_multiplier: multiplier to number of repeats per stage
+
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_e1_c16_se0.25'],
+        ['ir_r2_k3_s2_e6_c24_se0.25'],
+        ['ir_r2_k5_s2_e6_c40_se0.25'],
+        ['ir_r3_k3_s2_e6_c80_se0.25'],
+        ['ir_r3_k5_s1_e6_c112_se0.25'],
+        ['ir_r4_k5_s2_e6_c192_se0.25'],
+        ['ir_r1_k3_s1_e6_c320_se0.25'],
+    ]
+    round_chs_fn = partial(round_channels, multiplier=channel_multiplier, divisor=channel_divisor)
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size),
+        num_features=round_chs_fn(1280),
+        stem_size=32,
+        round_chs_fn=round_chs_fn,
+        act_layer=resolve_act_layer(kwargs, 'swish'),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_efficientnet_edge(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
+    """ Creates an EfficientNet-EdgeTPU model
+
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/edgetpu
+    """
+
+    arch_def = [
+        # NOTE `fc` is present to override a mismatch between stem channels and in chs not
+        # present in other models
+        ['er_r1_k3_s1_e4_c24_fc24_noskip'],
+        ['er_r2_k3_s2_e8_c32'],
+        ['er_r4_k3_s2_e8_c48'],
+        ['ir_r5_k5_s2_e8_c96'],
+        ['ir_r4_k5_s1_e8_c144'],
+        ['ir_r2_k5_s2_e8_c192'],
+    ]
+    round_chs_fn = partial(round_channels, multiplier=channel_multiplier)
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size),
+        num_features=round_chs_fn(1280),
+        stem_size=32,
+        round_chs_fn=round_chs_fn,
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        act_layer=resolve_act_layer(kwargs, 'relu'),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_efficientnet_condconv(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=1, pretrained=False, **kwargs):
+    """Creates an EfficientNet-CondConv model.
+
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/condconv
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_e1_c16_se0.25'],
+        ['ir_r2_k3_s2_e6_c24_se0.25'],
+        ['ir_r2_k5_s2_e6_c40_se0.25'],
+        ['ir_r3_k3_s2_e6_c80_se0.25'],
+        ['ir_r3_k5_s1_e6_c112_se0.25_cc4'],
+        ['ir_r4_k5_s2_e6_c192_se0.25_cc4'],
+        ['ir_r1_k3_s1_e6_c320_se0.25_cc4'],
+    ]
+    # NOTE unlike official impl, this one uses `cc<x>` option where x is the base number of experts for each stage and
+    # the expert_multiplier increases that on a per-model basis as with depth/channel multipliers
+    round_chs_fn = partial(round_channels, multiplier=channel_multiplier)
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier, experts_multiplier=experts_multiplier),
+        num_features=round_chs_fn(1280),
+        stem_size=32,
+        round_chs_fn=round_chs_fn,
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        act_layer=resolve_act_layer(kwargs, 'swish'),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates an EfficientNet-Lite model.
+
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite
+    Paper: https://arxiv.org/abs/1905.11946
+
+    EfficientNet params
+    name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
+      'efficientnet-lite0': (1.0, 1.0, 224, 0.2),
+      'efficientnet-lite1': (1.0, 1.1, 240, 0.2),
+      'efficientnet-lite2': (1.1, 1.2, 260, 0.3),
+      'efficientnet-lite3': (1.2, 1.4, 280, 0.3),
+      'efficientnet-lite4': (1.4, 1.8, 300, 0.3),
+
+    Args:
+      channel_multiplier: multiplier to number of channels per layer
+      depth_multiplier: multiplier to number of repeats per stage
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_e1_c16'],
+        ['ir_r2_k3_s2_e6_c24'],
+        ['ir_r2_k5_s2_e6_c40'],
+        ['ir_r3_k3_s2_e6_c80'],
+        ['ir_r3_k5_s1_e6_c112'],
+        ['ir_r4_k5_s2_e6_c192'],
+        ['ir_r1_k3_s1_e6_c320'],
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier, fix_first_last=True),
+        num_features=1280,
+        stem_size=32,
+        fix_stem=True,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        act_layer=resolve_act_layer(kwargs, 'relu6'),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_efficientnetv2_base(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    """ Creates an EfficientNet-V2 base model
+
+    Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
+    Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298
+    """
+    arch_def = [
+        ['cn_r1_k3_s1_e1_c16_skip'],
+        ['er_r2_k3_s2_e4_c32'],
+        ['er_r2_k3_s2_e4_c48'],
+        ['ir_r3_k3_s2_e4_c96_se0.25'],
+        ['ir_r5_k3_s1_e6_c112_se0.25'],
+        ['ir_r8_k3_s2_e6_c192_se0.25'],
+    ]
+    round_chs_fn = partial(round_channels, multiplier=channel_multiplier, round_limit=0.)
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier),
+        num_features=round_chs_fn(1280),
+        stem_size=32,
+        round_chs_fn=round_chs_fn,
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        act_layer=resolve_act_layer(kwargs, 'silu'),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_efficientnetv2_s(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, rw=False, pretrained=False, **kwargs):
+    """ Creates an EfficientNet-V2 Small model
+
+    Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
+    Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298
+
+    NOTE: `rw` flag sets up 'small' variant to behave like my initial v2 small model,
+        before ref the impl was released.
+    """
+    arch_def = [
+        ['cn_r2_k3_s1_e1_c24_skip'],
+        ['er_r4_k3_s2_e4_c48'],
+        ['er_r4_k3_s2_e4_c64'],
+        ['ir_r6_k3_s2_e4_c128_se0.25'],
+        ['ir_r9_k3_s1_e6_c160_se0.25'],
+        ['ir_r15_k3_s2_e6_c256_se0.25'],
+    ]
+    num_features = 1280
+    if rw:
+        # my original variant, based on paper figure differs from the official release
+        arch_def[0] = ['er_r2_k3_s1_e1_c24']
+        arch_def[-1] = ['ir_r15_k3_s2_e6_c272_se0.25']
+        num_features = 1792
+
+    round_chs_fn = partial(round_channels, multiplier=channel_multiplier)
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size),
+        num_features=round_chs_fn(num_features),
+        stem_size=24,
+        round_chs_fn=round_chs_fn,
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        act_layer=resolve_act_layer(kwargs, 'silu'),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_efficientnetv2_m(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    """ Creates an EfficientNet-V2 Medium model
+
+    Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
+    Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298
+    """
+
+    arch_def = [
+        ['cn_r3_k3_s1_e1_c24_skip'],
+        ['er_r5_k3_s2_e4_c48'],
+        ['er_r5_k3_s2_e4_c80'],
+        ['ir_r7_k3_s2_e4_c160_se0.25'],
+        ['ir_r14_k3_s1_e6_c176_se0.25'],
+        ['ir_r18_k3_s2_e6_c304_se0.25'],
+        ['ir_r5_k3_s1_e6_c512_se0.25'],
+    ]
+
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier),
+        num_features=1280,
+        stem_size=24,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        act_layer=resolve_act_layer(kwargs, 'silu'),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_efficientnetv2_l(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    """ Creates an EfficientNet-V2 Large model
+
+    Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
+    Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298
+    """
+
+    arch_def = [
+        ['cn_r4_k3_s1_e1_c32_skip'],
+        ['er_r7_k3_s2_e4_c64'],
+        ['er_r7_k3_s2_e4_c96'],
+        ['ir_r10_k3_s2_e4_c192_se0.25'],
+        ['ir_r19_k3_s1_e6_c224_se0.25'],
+        ['ir_r25_k3_s2_e6_c384_se0.25'],
+        ['ir_r7_k3_s1_e6_c640_se0.25'],
+    ]
+
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier),
+        num_features=1280,
+        stem_size=32,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        act_layer=resolve_act_layer(kwargs, 'silu'),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_efficientnetv2_xl(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    """ Creates an EfficientNet-V2 Xtra-Large model
+
+    Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
+    Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298
+    """
+
+    arch_def = [
+        ['cn_r4_k3_s1_e1_c32_skip'],
+        ['er_r8_k3_s2_e4_c64'],
+        ['er_r8_k3_s2_e4_c96'],
+        ['ir_r16_k3_s2_e4_c192_se0.25'],
+        ['ir_r24_k3_s1_e6_c256_se0.25'],
+        ['ir_r32_k3_s2_e6_c512_se0.25'],
+        ['ir_r8_k3_s1_e6_c640_se0.25'],
+    ]
+
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier),
+        num_features=1280,
+        stem_size=32,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        act_layer=resolve_act_layer(kwargs, 'silu'),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_mixnet_s(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a MixNet Small model.
+
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet
+    Paper: https://arxiv.org/abs/1907.09595
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_e1_c16'],  # relu
+        # stage 1, 112x112 in
+        ['ir_r1_k3_a1.1_p1.1_s2_e6_c24', 'ir_r1_k3_a1.1_p1.1_s1_e3_c24'],  # relu
+        # stage 2, 56x56 in
+        ['ir_r1_k3.5.7_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'],  # swish
+        # stage 3, 28x28 in
+        ['ir_r1_k3.5.7_p1.1_s2_e6_c80_se0.25_nsw', 'ir_r2_k3.5_p1.1_s1_e6_c80_se0.25_nsw'],  # swish
+        # stage 4, 14x14in
+        ['ir_r1_k3.5.7_a1.1_p1.1_s1_e6_c120_se0.5_nsw', 'ir_r2_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'],  # swish
+        # stage 5, 14x14in
+        ['ir_r1_k3.5.7.9.11_s2_e6_c200_se0.5_nsw', 'ir_r2_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'],  # swish
+        # 7x7
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def),
+        num_features=1536,
+        stem_size=16,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_mixnet_m(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+    """Creates a MixNet Medium-Large model.
+
+    Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet
+    Paper: https://arxiv.org/abs/1907.09595
+    """
+    arch_def = [
+        # stage 0, 112x112 in
+        ['ds_r1_k3_s1_e1_c24'],  # relu
+        # stage 1, 112x112 in
+        ['ir_r1_k3.5.7_a1.1_p1.1_s2_e6_c32', 'ir_r1_k3_a1.1_p1.1_s1_e3_c32'],  # relu
+        # stage 2, 56x56 in
+        ['ir_r1_k3.5.7.9_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'],  # swish
+        # stage 3, 28x28 in
+        ['ir_r1_k3.5.7_s2_e6_c80_se0.25_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e6_c80_se0.25_nsw'],  # swish
+        # stage 4, 14x14in
+        ['ir_r1_k3_s1_e6_c120_se0.5_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'],  # swish
+        # stage 5, 14x14in
+        ['ir_r1_k3.5.7.9_s2_e6_c200_se0.5_nsw', 'ir_r3_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'],  # swish
+        # 7x7
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier, depth_trunc='round'),
+        num_features=1536,
+        stem_size=24,
+        round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+def _gen_tinynet(
+    variant, model_width=1.0, depth_multiplier=1.0, pretrained=False, **kwargs
+):
+    """Creates a TinyNet model.
+    """
+    arch_def = [
+        ['ds_r1_k3_s1_e1_c16_se0.25'], ['ir_r2_k3_s2_e6_c24_se0.25'],
+        ['ir_r2_k5_s2_e6_c40_se0.25'], ['ir_r3_k3_s2_e6_c80_se0.25'],
+        ['ir_r3_k5_s1_e6_c112_se0.25'], ['ir_r4_k5_s2_e6_c192_se0.25'],
+        ['ir_r1_k3_s1_e6_c320_se0.25'],
+    ]
+    model_kwargs = dict(
+        block_args=decode_arch_def(arch_def, depth_multiplier, depth_trunc='round'),
+        num_features=max(1280, round_channels(1280, model_width, 8, None)),
+        stem_size=32,
+        fix_stem=True,
+        round_chs_fn=partial(round_channels, multiplier=model_width),
+        act_layer=resolve_act_layer(kwargs, 'swish'),
+        norm_layer=kwargs.pop('norm_layer', None) or partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
+        **kwargs,
+    )
+    model = _create_effnet(variant, pretrained, **model_kwargs)
+    return model
+
+
+
+@register_model
+def efficientnetv2_rw_t(pretrained=False, **kwargs):
+    """ EfficientNet-V2 Tiny (Custom variant, tiny not in paper). """
+    model = _gen_efficientnetv2_s(
+        'efficientnetv2_rw_t', channel_multiplier=0.8, depth_multiplier=0.9, rw=False, pretrained=pretrained, **kwargs)
+    return model
+
+
+@register_model
+def gc_efficientnetv2_rw_t(pretrained=False, **kwargs):
+    """ EfficientNet-V2 Tiny w/ Global Context Attn (Custom variant, tiny not in paper). """
+    model = _gen_efficientnetv2_s(
+        'gc_efficientnetv2_rw_t', channel_multiplier=0.8, depth_multiplier=0.9,
+        rw=False, se_layer='gc', pretrained=pretrained, **kwargs)
+    return model
+
+
+@register_model
+def efficientnetv2_rw_s(pretrained=False, **kwargs):
+    """ EfficientNet-V2 Small (RW variant).
+    NOTE: This is my initial (pre official code release) w/ some differences.
+    See efficientnetv2_s and tf_efficientnetv2_s for versions that match the official w/ PyTorch vs TF padding
+    """
+    model = _gen_efficientnetv2_s('efficientnetv2_rw_s', rw=True, pretrained=pretrained, **kwargs)
+    return model
+
+
+@register_model
+def efficientnetv2_rw_m(pretrained=False, **kwargs):
+    """ EfficientNet-V2 Medium (RW variant).
+    """
+    model = _gen_efficientnetv2_s(
+        'efficientnetv2_rw_m', channel_multiplier=1.2, depth_multiplier=(1.2,) * 4 + (1.6,) * 2, rw=True,
+        pretrained=pretrained, **kwargs)
+    return model
+
+
+@register_model
+def efficientnetv2_s(pretrained=False, **kwargs):
+    """ EfficientNet-V2 Small. """
+    model = _gen_efficientnetv2_s('efficientnetv2_s', pretrained=pretrained, **kwargs)
+    return model
+
+
+@register_model
+def efficientnetv2_m(pretrained=False, **kwargs):
+    """ EfficientNet-V2 Medium. """
+    model = _gen_efficientnetv2_m('efficientnetv2_m', pretrained=pretrained, **kwargs)
+    return model
+
+
+@register_model
+def efficientnetv2_l(pretrained=False, **kwargs):
+    """ EfficientNet-V2 Large. """
+    model = _gen_efficientnetv2_l('efficientnetv2_l', pretrained=pretrained, **kwargs)
+    return model
+
+
+@register_model
+def efficientnetv2_xl(pretrained=False, **kwargs):
+    """ EfficientNet-V2 Xtra-Large. """
+    model = _gen_efficientnetv2_xl('efficientnetv2_xl', pretrained=pretrained, **kwargs)
+    return model
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+if __name__ == '__main__':
+    args = parse_args()
+    model = efficientnetv2_rw_t(num_classes=1000, pretrained='imagenet')
+    model.cuda()
+    model.eval()
+    input = torch.randn(32, 3, 288, 288, device='cuda')
+    export_onnx_file = args.output_model
+
+    torch.onnx.export(model,        
+                    input,            
+                    export_onnx_file,       
+                    export_params=True,  
+                    opset_version=11,    
+                    do_constant_folding=True,  
+                    input_names = ['input'],   
+                    output_names = ['output'], ) 
+    print(" ") 
+    print('Model has been converted to ONNX') 
+    print("exit")
+    exit()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/inference.py b/models/cv/classification/efficientnet_v2/ixrt/inference.py
new file mode 100644
index 00000000..62ec18b3
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/inference.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+import pycuda.autoinit
+import pycuda.driver as cuda
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+    dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+    host_mem = tensorrt.IHostMemory
+    logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+    # Load Engine && I/O bindings
+    engine, context = create_engine_context(config.engine_file, logger)
+    inputs, outputs, allocations = get_io_bindings(engine)
+
+    # Warm up
+    if config.warm_up > 0:
+        print("\nWarm Start.")
+        for i in range(config.warm_up):
+            context.execute_v2(allocations)
+        print("Warm Done.")
+
+    # Inference
+    if config.test_mode == "FPS":
+        torch.cuda.synchronize()
+        start_time = time.time()
+
+        for i in range(config.loop_count):
+            context.execute_v2(allocations)
+
+        torch.cuda.synchronize()
+        end_time = time.time()
+        forward_time = end_time - start_time
+
+        num_samples = 50000
+        if config.loop_count * config.bsz < num_samples:
+            num_samples = config.loop_count * config.bsz
+        fps = num_samples / forward_time
+
+        print("FPS : ", fps)
+        print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+        if fps >= config.fps_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+    elif config.test_mode == "ACC":
+
+        ## Prepare the output data
+        output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+        print(f"output shape : {output.shape} output type : {output.dtype}")
+
+        total_sample = 0
+        acc_top1, acc_top5 = 0, 0
+
+        with tqdm(total= len(dataloader)) as _tqdm:
+            for idx, (batch_data, batch_label) in enumerate(dataloader):
+                batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+                batch_data = np.ascontiguousarray(batch_data)
+                total_sample += batch_data.shape[0]
+
+                cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+                context.execute_v2(allocations)
+                cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+                # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+                if len(output.shape) == 4:
+                    output = output.squeeze(axis=(2,3))
+
+                batch_top1, batch_top5 = eval_batch(output, batch_label)
+                acc_top1 += batch_top1
+                acc_top5 += batch_top5
+
+                _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+                                    acc_5='{:.4f}'.format(acc_top5/total_sample))
+                _tqdm.update(1)
+
+        print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+        print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+        acc1 = acc_top1/total_sample
+        print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+        if acc1 >= config.acc_target:
+            print("pass!")
+            exit()
+        else:
+            print("failed!")
+            exit(1)
+
+def parse_config():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+    parser.add_argument(
+        "--engine_file",
+        type=str,
+        help="engine file path"
+    )
+    parser.add_argument(
+        "--datasets_dir",
+        type=str,
+        default="",
+        help="ImageNet dir",
+    )
+    parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+    parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+    parser.add_argument(
+        "--imgsz",
+        "--img",
+        "--img-size",
+        type=int,
+        default=224,
+        help="inference size h,w",
+    )
+    parser.add_argument("--use_async", action="store_true")
+    parser.add_argument(
+        "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+    )
+    parser.add_argument("--fps_target", type=float, default=-1.0)
+    parser.add_argument("--acc_target", type=float, default=-1.0)
+    parser.add_argument("--loop_count", type=int, default=-1)
+
+    config = parser.parse_args()
+    return config
+
+if __name__ == "__main__":
+    config = parse_config()
+    main(config)
diff --git a/models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py b/models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py
new file mode 100644
index 00000000..4ac42a30
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+    batch_size = bsz
+
+    # The following code changes the first dimension of every input to be batch_size
+    # Modify as appropriate ... note that this requires all inputs to
+    # have the same batch_size
+    inputs = model.graph.input
+    for input in inputs:
+        # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+        # Add checks as needed.
+        dim1 = input.type.tensor_type.shape.dim[0]
+        # update dim to be a symbolic value
+        if isinstance(batch_size, str):
+            # set dynamic batch size
+            dim1.dim_param = batch_size
+        elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+            # set given batch size
+            dim1.dim_value = int(batch_size)
+        else:
+            # set batch size of 1
+            dim1.dim_value = 1
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--batch_size", type=int)
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+    
+
+
+
diff --git a/models/cv/classification/efficientnet_v2/ixrt/quant.py b/models/cv/classification/efficientnet_v2/ixrt/quant.py
new file mode 100644
index 00000000..6c06eba2
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/quant.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+"""这是一个高度自动化的 PPQ 量化的入口脚本，将你的模型和数据按要求进行打包:
+
+在自动化 API 中，我们使用 QuantizationSetting 对象传递量化参数。
+
+This file will show you how to quantize your network with PPQ
+    You should prepare your model and calibration dataset as follow:
+
+    ~/working/model.onnx                          <--  your model
+    ~/working/data/*.npy or ~/working/data/*.bin  <--  your dataset
+
+if you are using caffe model:
+    ~/working/model.caffemdoel  <--  your model
+    ~/working/model.prototext   <--  your model
+
+### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ###
+
+quantized model will be generated at: ~/working/quantized.onnx
+"""
+from ppq import *
+from ppq.api import *
+import os
+from calibration_dataset import getdataloader
+import argparse
+import random
+import numpy as np
+import torch
+
+def setseed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name", type=str)
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--dataset_dir", type=str, default="imagenet_val")
+    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"],
+                        default="hist_percentile")
+    parser.add_argument("--disable_quant_names", nargs='*', type=str)
+    parser.add_argument("--save_dir", type=str, help="save path", default=None)
+    parser.add_argument("--bsz", type=int, default=32)
+    parser.add_argument("--step", type=int, default=20)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--imgsz", type=int, default=288)
+    args = parser.parse_args()
+    print("Quant config:", args)
+    print(args.disable_quant_names)
+    return args
+
+
+config = parse_args()
+
+# modify configuration below:
+WORKING_DIRECTORY = 'checkpoints'  # choose your working directory
+TARGET_PLATFORM = TargetPlatform.TRT_INT8  # choose your target platform
+MODEL_TYPE = NetworkFramework.ONNX  # or NetworkFramework.CAFFE
+INPUT_LAYOUT = 'chw'  # input data layout, chw or hwc
+NETWORK_INPUTSHAPE = [32, 3, 288, 288]  # input shape of your network
+EXECUTING_DEVICE = 'cuda'  # 'cuda' or 'cpu'.
+REQUIRE_ANALYSE = False
+TRAINING_YOUR_NETWORK = False  # 是否需要 Finetuning 一下你的网络
+# -------------------------------------------------------------------
+# 加载你的模型文件，PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式
+# 如果你正使用 pytorch, tensorflow 等框架，你可以先将模型导出成 onnx
+# 使用 torch.onnx.export 即可，如果你在导出 torch 模型时发生错误，欢迎与我们联系。
+# -------------------------------------------------------------------
+graph = None
+if MODEL_TYPE == NetworkFramework.ONNX:
+    graph = load_onnx_graph(onnx_import_file=config.model)
+if MODEL_TYPE == NetworkFramework.CAFFE:
+    graph = load_caffe_graph(
+        caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'),
+        prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt'))
+assert graph is not None, 'Graph Loading Error, Check your input again.'
+
+# -------------------------------------------------------------------
+# SETTING 对象用于控制 PPQ 的量化逻辑，主要描述了图融合逻辑、调度方案、量化细节策略等
+# 当你的网络量化误差过高时，你需要修改 SETTING 对象中的属性来进行特定的优化
+# -------------------------------------------------------------------
+QS = QuantizationSettingFactory.default_setting()
+
+# -------------------------------------------------------------------
+# 下面向你展示了如何使用 finetuning 过程提升量化精度
+# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度
+# 开启他们的方式都是 QS.xxxx = True
+# 按需使用，不要全部打开，容易起飞
+# -------------------------------------------------------------------
+if TRAINING_YOUR_NETWORK:
+    QS.lsq_optimization = True  # 启动网络再训练过程，降低量化误差
+    QS.lsq_optimization_setting.steps = 500  # 再训练步数，影响训练时间，500 步大概几分钟
+    QS.lsq_optimization_setting.collecting_device = 'cuda'  # 缓存数据放在那，cuda 就是放在gpu，如果显存超了你就换成 'cpu'
+
+
+
+dataloader = getdataloader(config.dataset_dir, config.step, config.bsz, img_sz=config.imgsz)
+# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x，但是你如果没有装相应编译环境的话是编译不了的
+# 你可以尝试安装编译环境，或者在不启动 CUDA KERNEL 的情况下完成量化：移除 with ENABLE_CUDA_KERNEL(): 即可
+with ENABLE_CUDA_KERNEL():
+    print('网络正量化中，根据你的量化配置，这将需要一段时间:')
+    quantized = quantize_native_model(
+        setting=QS,  # setting 对象用来控制标准量化逻辑
+        model=graph,
+        calib_dataloader=dataloader,
+        calib_steps=config.step,
+        input_shape=NETWORK_INPUTSHAPE,  # 如果你的网络只有一个输入，使用这个参数传参
+        inputs=None,
+        # 如果你的网络有多个输入，使用这个参数传参，就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)]
+        collate_fn=lambda x: x[0].to(EXECUTING_DEVICE),  # collate_fn 跟 torch dataloader 的 collate fn 是一样的，用于数据预处理，
+        # 你当然也可以用 torch dataloader 的那个，然后设置这个为 None
+        platform=TARGET_PLATFORM,
+        device=EXECUTING_DEVICE,
+        do_quantize=True)
+
+    # -------------------------------------------------------------------
+    # 如果你需要执行量化后的神经网络并得到结果，则需要创建一个 executor
+    # 这个 executor 的行为和 torch.Module 是类似的，你可以利用这个东西来获取执行结果
+    # 请注意，必须在 export 之前执行此操作。
+    # -------------------------------------------------------------------
+    executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE)
+    # output = executor.forward(input)
+
+    # -------------------------------------------------------------------
+    # PPQ 计算量化误差时，使用信噪比的倒数作为指标，即噪声能量 / 信号能量
+    # 量化误差 0.1 表示在整体信号中，量化噪声的能量约为 10%
+    # 你应当注意，在 graphwise_error_analyse 分析中，我们衡量的是累计误差
+    # 网络的最后一层往往都具有较大的累计误差，这些误差是其前面的所有层所共同造成的
+    # 你需要使用 layerwise_error_analyse 逐层分析误差的来源
+    # -------------------------------------------------------------------
+    print('正计算网络量化误差(SNR)，最后一层的误差应小于 0.1 以保证量化精度:')
+    reports = graphwise_error_analyse(
+        graph=quantized, running_device=EXECUTING_DEVICE, steps=32,
+        dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE))
+    for op, snr in reports.items():
+        if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著，请考虑进行优化')
+
+    if REQUIRE_ANALYSE:
+        print('正计算逐层量化误差(SNR)，每一层的独立量化误差应小于 0.1 以保证量化精度:')
+        layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE,
+                                interested_outputs=None,
+                                dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))
+
+    # -------------------------------------------------------------------
+    # 使用 export_ppq_graph 函数来导出量化后的模型
+    # PPQ 会根据你所选择的导出平台来修改模型格式
+    # -------------------------------------------------------------------
+    print('网络量化结束，正在生成目标文件:')
+    export_ppq_graph(
+        graph=quantized, platform=TARGET_PLATFORM,
+        graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"),
+        config_save_to=os.path.join(config.save_dir, 'quant_cfg.json'))
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/refine_model.py b/models/cv/classification/efficientnet_v2/ixrt/refine_model.py
new file mode 100644
index 00000000..000ee4dc
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/refine_model.py
@@ -0,0 +1,291 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import argparse
+import dataclasses
+
+import torch
+import onnx
+
+from refine_utils.matmul_to_gemm_pass import FusedGemmPass
+from refine_utils.linear_pass import FusedLinearPass
+
+from refine_utils.common import *
+
+def get_constant_input_name_of_operator(graph: Graph, operator: Operator):
+    const = None
+    for input in operator.inputs:
+        if not graph.containe_var(input):
+            continue
+
+        if not graph.is_leaf_variable(input):
+            continue
+
+        input_var = graph.get_variable(input)
+        if input_var.value is not None:
+            const = input
+    return const 
+
+class FuseLayerNormPass(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        self.transform = GraphTransform(graph)
+        find_sequence_subgraph(
+            graph,
+            [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD],
+            self.fuse_layer_norm,
+            strict=False
+        )
+        return graph
+
+    def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph):
+        # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的
+        if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]:
+            return
+
+        # 检查 POW 的输入是否和 DIV 的输入是一致的
+        if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]:
+            return
+
+        # 检查部分算子的输出是否被多个算子使用
+        nodes = pattern.nodes
+        for node in [nodes[0]] + nodes[2:-1]:
+            next_ops = graph.get_next_operators(node.operator)
+            if len(next_ops) > 1:
+                return
+
+        eps = None
+        for input in nodes[4].operator.inputs:
+            input_var = graph.get_variable(input)
+            if input_var.value is not None and graph.is_leaf_variable(input):
+                eps = to_py_type(input_var.value)
+
+        scale = get_constant_input_name_of_operator(graph, nodes[-2].operator)
+        bias = get_constant_input_name_of_operator(graph, nodes[-1].operator)
+
+        self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator)
+        
+        bias_var = graph.get_variable(bias)
+        print(bias_var)
+        
+        attributes = {
+            "axis": nodes[0].operator.attributes.axes,
+            "epsilon": eps,
+        }
+        
+        
+        layer_norm_op = self.transform.make_operator(
+            op_type="LayerNormalization",
+            inputs=[nodes[0].operator.inputs[0], scale, bias],
+            outputs=[nodes[-1].operator.outputs[0]],
+            **attributes
+        )
+
+        self.transform.add_operator(layer_norm_op)
+
+class FusedGeluPass(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        self.transform = GraphTransform(graph)
+
+        find_sequence_subgraph(
+            graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True
+        )
+        return graph
+
+    def fuse_gelu(self, graph: Graph, pattern: PatternGraph):
+        nodes = pattern.nodes
+        prev_op = self.transform.get_previous_operators(nodes[0].operator)[0]
+        next_ops = self.transform.get_next_operators(prev_op)
+        if len(next_ops) != 2:
+            return
+
+        if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops:
+            return
+
+        gelu_op_input = None
+        for input in nodes[3].operator.inputs:
+            if input in nodes[0].operator.inputs:
+                gelu_op_input = input
+                break
+
+        self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator)
+
+        gelu_op = self.transform.make_operator(
+            op_type=OP.GELU,
+            inputs=[gelu_op_input],
+            outputs=[nodes[-1].operator.outputs[0]]
+        )
+        self.transform.add_operator(gelu_op)
+
+@dataclasses.dataclass
+class NormalizeAttr(BaseOperatorAttr):
+    p: float = 2.0
+    epsilon: float = 1e-12
+    axis: int = 1
+
+
+@registe_operator(OP.GELU)
+class GeluOperator(BaseOperator):
+
+    def call(
+        self,
+        executor,
+        operator: Operator,
+        inputs: List,
+        attr: NormalizeAttr,
+    ):
+        return F.gelu(inputs[0])
+
+    def convert_onnx_operator(
+        self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto
+    ) -> Operator:
+        return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr)
+
+    def quantize(
+        self,
+        graph: Graph,
+        op: Operator,
+        operator_observer_config: QuantOperatorObserverConfig,
+        quant_outputs: bool = False,
+    ):
+        return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs)
+
+
+
+class ClearUnsedVariables(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        vars = list(graph.variables)
+
+        for var in vars:
+            if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var):
+                graph.delete_variable(var)
+
+        quant_params = list(graph.quant_parameters.keys())
+        for var in quant_params:
+            if not graph.containe_var(var):
+                graph.quant_parameters.pop(var)
+
+        return graph
+
+class FormatLayerNorm(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        for op in graph.operators.values():
+            if "LayerNorm" in op.op_type:
+                self.format_layer_norm(graph, op)
+        return graph
+
+    def format_layer_norm(self, graph, operator):
+        if not hasattr(operator.attributes, "axis"):
+            return
+        if isinstance(operator.attributes.axis, (tuple, list)):
+            operator.attributes.axis = operator.attributes.axis[0]
+
+class FormatReshape(BasePass):
+
+    def process(self, graph: Graph) -> Graph:
+        for op in graph.operators.values():
+            if op.op_type == "Reshape":
+                self.format_reshape(graph, op)
+
+        return graph
+
+    def format_reshape(self, graph, operator):
+        shape = graph.get_variable(operator.inputs[1])
+        shape.value = torch.tensor(shape.value, dtype=torch.int64)
+
+class FormatScalar(BasePass):
+
+    def process(self, graph: Graph):
+        for var in graph.variables.values():
+            var: Variable
+            use_ops = graph.get_dst_operators(var)
+
+            if len(use_ops) == 0:
+                continue
+
+            if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]:
+                continue
+
+            if var.value is not None and var.value.ndim == 0:
+                var.value = var.value.reshape(1)
+                print(f"Reshape scalar to tensor for {var.name}.")
+
+        return graph
+
+class RenamePass(BasePass):
+
+    def process(self, graph:Graph):
+
+        names = [name for name in graph.operators.keys()]
+        for old_name in names:
+            new_name = old_name.replace("/", "#")
+
+            graph.rename_operator(old_name, new_name)
+
+        names = [name for name in graph.variables.keys()]
+        for name in names:
+            new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out")
+
+            graph.rename_vaiable(name, new_name,
+                                with_variables=True, 
+                                with_operator_outputs=True)
+
+        return graph
+
+def create_pipeline(example_inputs):
+    return PassSequence(
+        # FuseLayerNormPass(),
+        FusedGeluPass(),
+
+        # ClearUnsedVariables(),
+        # FormatLayerNorm(),
+        # FormatReshape(),
+        # FormatScalar(),
+        # RenamePass()
+    )
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--onnx_path", type=str)
+    parser.add_argument("--dst_onnx_path", type=str)
+
+    parser.add_argument("--bsz", type=int, default=8,
+                        help="Batch size")
+    parser.add_argument("--imgsz", type=int, default=224,
+                        help="Image size")
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz)
+
+    refine_pipline = Pipeline(
+        create_source(f"{args.onnx_path}", example_inputs=example_inputs),
+        create_pipeline(example_inputs),
+        create_target(
+            f"{args.dst_onnx_path}",
+            example_inputs=example_inputs,
+        )
+    )
+    refine_pipline.run()
+
+    print(f"refine the model, input shape={example_inputs.shape}")
diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_accuracy.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_accuracy.sh
new file mode 100755
index 00000000..e62cc5d7
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_accuracy.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Model Input Name : ${MODEL_INPUT_NAME}
+echo Model Output Name : ${MODEL_OUTPUT_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    let step++
+    echo;
+    echo [STEP ${step}] : Quant Model
+    if [[ -z ${QUANT_EXIST_ONNX} ]];then
+        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+    fi
+    if [[ -f ${QUANT_EXIST_ONNX} ]];then
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+    else
+        python3 ${RUN_DIR}/quant.py            \
+            --model ${SIM_MODEL}               \
+            --model_name ${MODEL_NAME}         \
+            --dataset_dir ${DATASETS_DIR}      \
+            --observer ${QUANT_OBSERVER}       \
+            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+            --save_dir $CHECKPOINTS_DIR        \
+            --bsz   ${QUANT_BATCHSIZE}         \
+            --step  ${QUANT_STEP}              \
+            --seed  ${QUANT_SEED}              \
+            --imgsz ${IMGSIZE}
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Generate ${SIM_MODEL}
+    fi
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_performance.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_performance.sh
new file mode 100755
index 00000000..05c9986f
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_fp16_performance.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Model Input Name : ${MODEL_INPUT_NAME}
+echo Model Output Name : ${MODEL_OUTPUT_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+    echo "  "Simplify Model, ${SIM_MODEL} has been existed
+else
+    python3 ${RUN_DIR}/simplify_model.py \
+    --origin_model $ORIGINE_MODEL    \
+    --output_model ${SIM_MODEL}
+    echo "  "Generate ${SIM_MODEL}
+fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    let step++
+    echo;
+    echo [STEP ${step}] : Quant Model
+    if [[ -z ${QUANT_EXIST_ONNX} ]];then
+        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+    fi
+    if [[ -f ${QUANT_EXIST_ONNX} ]];then
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+    else
+        python3 ${RUN_DIR}/quant.py            \
+            --model ${SIM_MODEL}               \
+            --model_name ${MODEL_NAME}         \
+            --dataset_dir ${DATASETS_DIR}      \
+            --observer ${QUANT_OBSERVER}       \
+            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+            --save_dir $CHECKPOINTS_DIR        \
+            --bsz   ${QUANT_BATCHSIZE}         \
+            --step  ${QUANT_STEP}              \
+            --seed  ${QUANT_SEED}              \
+            --imgsz ${IMGSIZE}
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Generate ${SIM_MODEL}
+    fi
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+    echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+    python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+        --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+    echo "  "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+    echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+else
+    python3 ${RUN_DIR}/build_engine.py          \
+        --precision ${PRECISION}                \
+        --model ${FINAL_MODEL}                    \
+        --engine ${ENGINE_FILE}
+    echo "  "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --fps_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_accuracy.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_accuracy.sh
new file mode 100755
index 00000000..a58f44b3
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_accuracy.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+set -x
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=int8
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+echo ${QUANT_OBSERVER}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Model Input Name : ${MODEL_INPUT_NAME}
+echo Model Output Name : ${MODEL_OUTPUT_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+ echo [STEP ${step}] : Simplify Model
+ if [ -f ${SIM_MODEL} ];then
+     echo "  "Simplify Model, ${SIM_MODEL} has been existed
+ else
+     python3 ${RUN_DIR}/simplify_model.py \
+     --origin_model $ORIGINE_MODEL    \
+     --output_model ${SIM_MODEL}
+     echo "  "Generate ${SIM_MODEL}
+ fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    let step++
+    echo;
+    echo [STEP ${step}] : Quant Model
+    if [[ -z ${QUANT_EXIST_ONNX} ]];then
+        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+    fi
+    if [[ -f ${QUANT_EXIST_ONNX} ]];then
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+    else
+        python3 ${RUN_DIR}/quant.py            \
+            --model ${SIM_MODEL}               \
+            --model_name ${MODEL_NAME}         \
+            --dataset_dir ${DATASETS_DIR}      \
+            --observer ${QUANT_OBSERVER}       \
+            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+            --save_dir $CHECKPOINTS_DIR        \
+            --bsz   ${QUANT_BATCHSIZE}         \
+            --step  ${QUANT_STEP}              \
+            --seed  ${QUANT_SEED}              \
+            --imgsz ${IMGSIZE}
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Generate ${SIM_MODEL}
+    fi
+fi
+
+ # Change Batchsize
+ let step++
+ echo;
+ echo [STEP ${step}] : Change Batchsize
+ FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx
+ if [ -f $FINAL_MODEL ];then
+     echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+ else
+     python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+         --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+     echo "  "Generate ${FINAL_MODEL}
+ fi
+
+ # Build Engine
+ let step++
+ echo;
+ echo [STEP ${step}] : Build Engine
+ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+ if [ -f $ENGINE_FILE ];then
+     echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+ else
+     python3 ${RUN_DIR}/build_i8_engine.py          \
+         --onnx ${FINAL_MODEL}                    \
+         --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \
+         --engine ${ENGINE_FILE}
+     echo "  "Generate Engine ${ENGINE_FILE}
+ fi
+
+# Inference
+# let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_performance.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_performance.sh
new file mode 100755
index 00000000..07872405
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_int8_performance.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+    if ((${PIPESTATUS[0]} != 0));then
+    EXIT_STATUS=1
+    fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=int8
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+    index=`expr $index + 1`
+    case $argument in
+      --bs) BSZ=${arguments[index]};;
+      --tgt) TGT=${arguments[index]};;
+    esac
+done
+
+source ${CONFIG_DIR}
+echo ${QUANT_OBSERVER}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Model Input Name : ${MODEL_INPUT_NAME}
+echo Model Output Name : ${MODEL_OUTPUT_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+ echo [STEP ${step}] : Simplify Model
+ if [ -f ${SIM_MODEL} ];then
+     echo "  "Simplify Model, ${SIM_MODEL} has been existed
+ else
+     python3 ${RUN_DIR}/simplify_model.py \
+     --origin_model $ORIGINE_MODEL    \
+     --output_model ${SIM_MODEL}
+     echo "  "Generate ${SIM_MODEL}
+ fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+    let step++
+    echo;
+    echo [STEP ${step}] : Quant Model
+    if [[ -z ${QUANT_EXIST_ONNX} ]];then
+        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+    fi
+    if [[ -f ${QUANT_EXIST_ONNX} ]];then
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+    else
+        python3 ${RUN_DIR}/quant.py            \
+            --model ${SIM_MODEL}               \
+            --model_name ${MODEL_NAME}         \
+            --dataset_dir ${DATASETS_DIR}      \
+            --observer ${QUANT_OBSERVER}       \
+            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+            --save_dir $CHECKPOINTS_DIR        \
+            --bsz   ${QUANT_BATCHSIZE}         \
+            --step  ${QUANT_STEP}              \
+            --seed  ${QUANT_SEED}              \
+            --imgsz ${IMGSIZE}
+        SIM_MODEL=${QUANT_EXIST_ONNX}
+        echo "  "Generate ${SIM_MODEL}
+    fi
+fi
+
+ # Change Batchsize
+ let step++
+ echo;
+ echo [STEP ${step}] : Change Batchsize
+ FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx
+ if [ -f $FINAL_MODEL ];then
+     echo "  "Change Batchsize Skip, $FINAL_MODEL has been existed
+ else
+     python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+         --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+     echo "  "Generate ${FINAL_MODEL}
+ fi
+
+ # Build Engine
+ let step++
+ echo;
+ echo [STEP ${step}] : Build Engine
+ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+ if [ -f $ENGINE_FILE ];then
+     echo "  "Build Engine Skip, $ENGINE_FILE has been existed
+ else
+     python3 ${RUN_DIR}/build_i8_engine.py          \
+         --onnx ${FINAL_MODEL}                    \
+         --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \
+         --engine ${ENGINE_FILE}
+     echo "  "Generate Engine ${ENGINE_FILE}
+ fi
+
+# Inference
+# let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py     \
+    --engine_file=${ENGINE_FILE}    \
+    --datasets_dir=${DATASETS_DIR}  \
+    --imgsz=${IMGSIZE}              \
+    --warm_up=${WARM_UP}            \
+    --loop_count ${LOOP_COUNT}      \
+    --test_mode ${RUN_MODE}         \
+    --acc_target ${TGT}             \
+    --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/simplify_model.py b/models/cv/classification/efficientnet_v2/ixrt/simplify_model.py
new file mode 100644
index 00000000..4d53a474
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/simplify_model.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+    onnx_model = onnx.load(args.origin_model)
+    model_simp, check = simplify(onnx_model)
+    model_simp = onnx.shape_inference.infer_shapes(model_simp)
+    onnx.save(model_simp, args.output_model)
+    print("  Simplify onnx Done.")
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--origin_model", type=str)
+    parser.add_argument("--output_model", type=str)
+    parser.add_argument("--reshape", action="store_true")
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+simplify_model(args)
+    
+
+
+
-- 
Gitee


From 32fc2f09f39aa8ac1fa7fe35b2c09d1e3e4b412f Mon Sep 17 00:00:00 2001
From: tianxi-yi <xinchi.tian@iluvatar.com>
Date: Wed, 22 May 2024 13:33:35 +0800
Subject: [PATCH 2/5] fix README.md with Model Conversion

---
 .../cv/classification/efficientnet_v2/ixrt/README.md |  4 ++--
 .../efficientnet_v2/ixrt/export_onnx.py              | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md
index d06cff37..2e5228f1 100755
--- a/models/cv/classification/efficientnet_v2/ixrt/README.md
+++ b/models/cv/classification/efficientnet_v2/ixrt/README.md
@@ -27,8 +27,8 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 mkdir checkpoints
 git clone https://github.com/huggingface/pytorch-image-models.git
 cp /Path/to/ixrt/export_onnx.py pytorch-image-models/timm/models
-cd pytorch-image-models/timm/models
-python3 export_onnx.py --origin_model /path/to/efficientnetv2_t_agc-3620981a.pth --output_model checkpoints/efficientnet.onnx
+cd pytorch-image-models/timm
+python3 -m models.export_onnx --output_model checkpoints/efficientnet.onnx
 ```
 
 ## Inference
diff --git a/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py b/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py
index 2ada1df1..4af35a04 100755
--- a/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py
+++ b/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py
@@ -21,13 +21,13 @@ import torch.nn as nn
 import torch.nn.functional as F
 
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
-from _efficientnet_blocks import SqueezeExcite
-from _efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights,\
+from ._efficientnet_blocks import SqueezeExcite
+from ._efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights,\
     round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT
-from features import FeatureInfo, FeatureHooks
-from helpers import build_model_with_cfg, pretrained_cfg_for_features, checkpoint_seq
-from layers import create_conv2d, create_classifier, get_norm_act_layer, EvoNorm2dS0, GroupNormAct
-from registry import register_model
+from .features import FeatureInfo, FeatureHooks
+from .helpers import build_model_with_cfg, pretrained_cfg_for_features, checkpoint_seq
+from .layers import create_conv2d, create_classifier, get_norm_act_layer, EvoNorm2dS0, GroupNormAct
+from .registry import register_model
 import argparse
 import ssl
 
-- 
Gitee


From 8d7e1722e9f52480a1d39e0f6bab68b353266e00 Mon Sep 17 00:00:00 2001
From: tianxi-yi <xinchi.tian@iluvatar.com>
Date: Wed, 22 May 2024 15:36:18 +0800
Subject: [PATCH 3/5] Add _builder.py and fix README.md

---
 .../efficientnet_v2/ixrt/README.md            |   4 +
 .../efficientnet_v2/ixrt/_builder.py          | 480 ++++++++++++++++++
 2 files changed, 484 insertions(+)
 create mode 100755 models/cv/classification/efficientnet_v2/ixrt/_builder.py

diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md
index 2e5228f1..5071a1ab 100755
--- a/models/cv/classification/efficientnet_v2/ixrt/README.md
+++ b/models/cv/classification/efficientnet_v2/ixrt/README.md
@@ -27,7 +27,11 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 mkdir checkpoints
 git clone https://github.com/huggingface/pytorch-image-models.git
 cp /Path/to/ixrt/export_onnx.py pytorch-image-models/timm/models
+cd pytorch-image-models/timm/models
+rm _builder.py
+mv /Path/ixrt/_builder.py pytorch-image-models/timm/models
 cd pytorch-image-models/timm
+mv /Path/to/efficientnetv2_t_agc-3620981a.pth /root/.cache/torch/hub/checkpoints/
 python3 -m models.export_onnx --output_model checkpoints/efficientnet.onnx
 ```
 
diff --git a/models/cv/classification/efficientnet_v2/ixrt/_builder.py b/models/cv/classification/efficientnet_v2/ixrt/_builder.py
new file mode 100755
index 00000000..7246c0d5
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/ixrt/_builder.py
@@ -0,0 +1,480 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import dataclasses
+import logging
+import os
+from copy import deepcopy
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+from torch import nn as nn
+from torch.hub import load_state_dict_from_url
+
+from timm.models._features import FeatureListNet, FeatureDictNet, FeatureHookNet, FeatureGetterNet
+from timm.models._features_fx import FeatureGraphNet
+from timm.models._helpers import load_state_dict
+from timm.models._hub import has_hf_hub, download_cached_file, check_cached_file, load_state_dict_from_hf,\
+    load_custom_from_hf
+from timm.models._manipulate import adapt_input_conv
+from timm.models._pretrained import PretrainedCfg
+from timm.models._prune import adapt_model_from_file
+from timm.models._registry import get_pretrained_cfg
+
+_logger = logging.getLogger(__name__)
+
+# Global variables for rarely used pretrained checkpoint download progress and hash check.
+# Use set_pretrained_download_progress / set_pretrained_check_hash functions to toggle.
+_DOWNLOAD_PROGRESS = False
+_CHECK_HASH = False
+_USE_OLD_CACHE = int(os.environ.get('TIMM_USE_OLD_CACHE', 0)) > 0
+
+__all__ = ['set_pretrained_download_progress', 'set_pretrained_check_hash', 'load_custom_pretrained', 'load_pretrained',
+           'pretrained_cfg_for_features', 'resolve_pretrained_cfg', 'build_model_with_cfg']
+
+
+def _resolve_pretrained_source(pretrained_cfg):
+    cfg_source = pretrained_cfg.get('source', '')
+    pretrained_url = pretrained_cfg.get('url', None)
+    pretrained_file = pretrained_cfg.get('file', None)
+    pretrained_sd = pretrained_cfg.get('state_dict', None)
+    hf_hub_id = pretrained_cfg.get('hf_hub_id', None)
+
+    # resolve where to load pretrained weights from
+    load_from = ''
+    pretrained_loc = ''
+    if cfg_source == 'hf-hub' and has_hf_hub(necessary=True):
+        # hf-hub specified as source via model identifier
+        load_from = 'hf-hub'
+        assert hf_hub_id
+        pretrained_loc = hf_hub_id
+    else:
+        # default source == timm or unspecified
+        if pretrained_sd:
+            # direct state_dict pass through is the highest priority
+            load_from = 'state_dict'
+            pretrained_loc = pretrained_sd
+            assert isinstance(pretrained_loc, dict)
+        elif pretrained_file:
+            # file load override is the second-highest priority if set
+            load_from = 'file'
+            pretrained_loc = pretrained_file
+        else:
+            old_cache_valid = False
+            if _USE_OLD_CACHE:
+                # prioritized old cached weights if exists and env var enabled
+                old_cache_valid = check_cached_file(pretrained_url) if pretrained_url else False
+            if not old_cache_valid and hf_hub_id and has_hf_hub(necessary=True):
+                # hf-hub available as alternate weight source in default_cfg
+                load_from = 'hf-hub'
+                pretrained_loc = hf_hub_id
+            elif pretrained_url:
+                load_from = 'url'
+                pretrained_loc = pretrained_url
+
+    if load_from == 'hf-hub' and pretrained_cfg.get('hf_hub_filename', None):
+        # if a filename override is set, return tuple for location w/ (hub_id, filename)
+        pretrained_loc = pretrained_loc, pretrained_cfg['hf_hub_filename']
+    return load_from, pretrained_loc
+
+
+def set_pretrained_download_progress(enable=True):
+    """ Set download progress for pretrained weights on/off (globally). """
+    global _DOWNLOAD_PROGRESS
+    _DOWNLOAD_PROGRESS = enable
+
+
+def set_pretrained_check_hash(enable=True):
+    """ Set hash checking for pretrained weights on/off (globally). """
+    global _CHECK_HASH
+    _CHECK_HASH = enable
+
+
+def load_custom_pretrained(
+        model: nn.Module,
+        pretrained_cfg: Optional[Dict] = None,
+        load_fn: Optional[Callable] = None,
+):
+    r"""Loads a custom (read non .pth) weight file
+
+    Downloads checkpoint file into cache-dir like torch.hub based loaders, but calls
+    a passed in custom load fun, or the `load_pretrained` model member fn.
+
+    If the object is already present in `model_dir`, it's deserialized and returned.
+    The default value of `model_dir` is ``<hub_dir>/checkpoints`` where
+    `hub_dir` is the directory returned by :func:`~torch.hub.get_dir`.
+
+    Args:
+        model: The instantiated model to load weights into
+        pretrained_cfg (dict): Default pretrained model cfg
+        load_fn: An external standalone fn that loads weights into provided model, otherwise a fn named
+            'laod_pretrained' on the model will be called if it exists
+    """
+    pretrained_cfg = pretrained_cfg or getattr(model, 'pretrained_cfg', None)
+    if not pretrained_cfg:
+        _logger.warning("Invalid pretrained config, cannot load weights.")
+        return
+
+    load_from, pretrained_loc = _resolve_pretrained_source(pretrained_cfg)
+    if not load_from:
+        _logger.warning("No pretrained weights exist for this model. Using random initialization.")
+        return
+    if load_from == 'hf-hub':
+        _logger.warning("Hugging Face hub not currently supported for custom load pretrained models.")
+    elif load_from == 'url':
+        pretrained_loc = download_cached_file(
+            pretrained_loc,
+            check_hash=_CHECK_HASH,
+            progress=_DOWNLOAD_PROGRESS,
+        )
+
+    if load_fn is not None:
+        load_fn(model, pretrained_loc)
+    elif hasattr(model, 'load_pretrained'):
+        model.load_pretrained(pretrained_loc)
+    else:
+        _logger.warning("Valid function to load pretrained weights is not available, using random initialization.")
+
+
+def load_pretrained(
+        model: nn.Module,
+        pretrained_cfg: Optional[Dict] = None,
+        num_classes: int = 1000,
+        in_chans: int = 3,
+        filter_fn: Optional[Callable] = None,
+        strict: bool = True,
+):
+    """ Load pretrained checkpoint
+
+    Args:
+        model (nn.Module) : PyTorch model module
+        pretrained_cfg (Optional[Dict]): configuration for pretrained weights / target dataset
+        num_classes (int): num_classes for target model
+        in_chans (int): in_chans for target model
+        filter_fn (Optional[Callable]): state_dict filter fn for load (takes state_dict, model as args)
+        strict (bool): strict load of checkpoint
+
+    """
+    pretrained_cfg = pretrained_cfg or getattr(model, 'pretrained_cfg', None)
+    if not pretrained_cfg:
+        raise RuntimeError("Invalid pretrained config, cannot load weights. Use `pretrained=False` for random init.")
+
+    load_from, pretrained_loc = _resolve_pretrained_source(pretrained_cfg)
+    load_from = 'url'
+    pretrained_loc = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_t_agc-3620981a.pth'
+    if load_from == 'state_dict':
+        _logger.info(f'Loading pretrained weights from state dict')
+        state_dict = pretrained_loc  # pretrained_loc is the actual state dict for this override
+    elif load_from == 'file':
+        _logger.info(f'Loading pretrained weights from file ({pretrained_loc})')
+        if pretrained_cfg.get('custom_load', False):
+            model.load_pretrained(pretrained_loc)
+            return
+        else:
+            state_dict = load_state_dict(pretrained_loc)
+    elif load_from == 'url':
+        _logger.info(f'Loading pretrained weights from url ({pretrained_loc})')
+        if pretrained_cfg.get('custom_load', False):
+            pretrained_loc = download_cached_file(
+                pretrained_loc,
+                progress=_DOWNLOAD_PROGRESS,
+                check_hash=_CHECK_HASH,
+            )
+            model.load_pretrained("/home/xinchi.tian/pytorch-image-models/timm/models/efficientnetv2_t_agc-3620981a.pth")
+            return
+        else:
+            state_dict = load_state_dict_from_url(
+                pretrained_loc,
+                map_location='cpu',
+                progress=_DOWNLOAD_PROGRESS,
+                check_hash=_CHECK_HASH,
+            )
+    elif load_from == 'hf-hub':
+        _logger.info(f'Loading pretrained weights from Hugging Face hub ({pretrained_loc})')
+        if isinstance(pretrained_loc, (list, tuple)):
+            custom_load = pretrained_cfg.get('custom_load', False)
+            if isinstance(custom_load, str) and custom_load == 'hf':
+                load_custom_from_hf(*pretrained_loc, model)
+                return
+            else:
+                state_dict = load_state_dict_from_hf(*pretrained_loc)
+        else:
+            state_dict = load_state_dict_from_hf(pretrained_loc)
+    else:
+        model_name = pretrained_cfg.get('architecture', 'this model')
+        raise RuntimeError(f"No pretrained weights exist for {model_name}. Use `pretrained=False` for random init.")
+
+    if filter_fn is not None:
+        try:
+            state_dict = filter_fn(state_dict, model)
+        except TypeError as e:
+            # for backwards compat with filter fn that take one arg
+            state_dict = filter_fn(state_dict)
+
+    input_convs = pretrained_cfg.get('first_conv', None)
+    if input_convs is not None and in_chans != 3:
+        if isinstance(input_convs, str):
+            input_convs = (input_convs,)
+        for input_conv_name in input_convs:
+            weight_name = input_conv_name + '.weight'
+            try:
+                state_dict[weight_name] = adapt_input_conv(in_chans, state_dict[weight_name])
+                _logger.info(
+                    f'Converted input conv {input_conv_name} pretrained weights from 3 to {in_chans} channel(s)')
+            except NotImplementedError as e:
+                del state_dict[weight_name]
+                strict = False
+                _logger.warning(
+                    f'Unable to convert pretrained {input_conv_name} weights, using random init for this layer.')
+
+    classifiers = pretrained_cfg.get('classifier', None)
+    label_offset = pretrained_cfg.get('label_offset', 0)
+    if classifiers is not None:
+        if isinstance(classifiers, str):
+            classifiers = (classifiers,)
+        if num_classes != pretrained_cfg['num_classes']:
+            for classifier_name in classifiers:
+                # completely discard fully connected if model num_classes doesn't match pretrained weights
+                state_dict.pop(classifier_name + '.weight', None)
+                state_dict.pop(classifier_name + '.bias', None)
+            strict = False
+        elif label_offset > 0:
+            for classifier_name in classifiers:
+                # special case for pretrained weights with an extra background class in pretrained weights
+                classifier_weight = state_dict[classifier_name + '.weight']
+                state_dict[classifier_name + '.weight'] = classifier_weight[label_offset:]
+                classifier_bias = state_dict[classifier_name + '.bias']
+                state_dict[classifier_name + '.bias'] = classifier_bias[label_offset:]
+
+    load_result = model.load_state_dict(state_dict, strict=strict)
+    if load_result.missing_keys:
+        _logger.info(
+            f'Missing keys ({", ".join(load_result.missing_keys)}) discovered while loading pretrained weights.'
+            f' This is expected if model is being adapted.')
+    if load_result.unexpected_keys:
+        _logger.warning(
+            f'Unexpected keys ({", ".join(load_result.unexpected_keys)}) found while loading pretrained weights.'
+            f' This may be expected if model is being adapted.')
+
+
+def pretrained_cfg_for_features(pretrained_cfg):
+    pretrained_cfg = deepcopy(pretrained_cfg)
+    # remove default pretrained cfg fields that don't have much relevance for feature backbone
+    to_remove = ('num_classes', 'classifier', 'global_pool')  # add default final pool size?
+    for tr in to_remove:
+        pretrained_cfg.pop(tr, None)
+    return pretrained_cfg
+
+
+def _filter_kwargs(kwargs, names):
+    if not kwargs or not names:
+        return
+    for n in names:
+        kwargs.pop(n, None)
+
+
+def _update_default_model_kwargs(pretrained_cfg, kwargs, kwargs_filter):
+    """ Update the default_cfg and kwargs before passing to model
+
+    Args:
+        pretrained_cfg: input pretrained cfg (updated in-place)
+        kwargs: keyword args passed to model build fn (updated in-place)
+        kwargs_filter: keyword arg keys that must be removed before model __init__
+    """
+    # Set model __init__ args that can be determined by default_cfg (if not already passed as kwargs)
+    default_kwarg_names = ('num_classes', 'global_pool', 'in_chans')
+    if pretrained_cfg.get('fixed_input_size', False):
+        # if fixed_input_size exists and is True, model takes an img_size arg that fixes its input size
+        default_kwarg_names += ('img_size',)
+
+    for n in default_kwarg_names:
+        # for legacy reasons, model __init__args uses img_size + in_chans as separate args while
+        # pretrained_cfg has one input_size=(C, H ,W) entry
+        if n == 'img_size':
+            input_size = pretrained_cfg.get('input_size', None)
+            if input_size is not None:
+                assert len(input_size) == 3
+                kwargs.setdefault(n, input_size[-2:])
+        elif n == 'in_chans':
+            input_size = pretrained_cfg.get('input_size', None)
+            if input_size is not None:
+                assert len(input_size) == 3
+                kwargs.setdefault(n, input_size[0])
+        elif n == 'num_classes':
+            default_val = pretrained_cfg.get(n, None)
+            # if default is < 0, don't pass through to model
+            if default_val is not None and default_val >= 0:
+                kwargs.setdefault(n, pretrained_cfg[n])
+        else:
+            default_val = pretrained_cfg.get(n, None)
+            if default_val is not None:
+                kwargs.setdefault(n, pretrained_cfg[n])
+
+    # Filter keyword args for task specific model variants (some 'features only' models, etc.)
+    _filter_kwargs(kwargs, names=kwargs_filter)
+
+
+def resolve_pretrained_cfg(
+        variant: str,
+        pretrained_cfg=None,
+        pretrained_cfg_overlay=None,
+) -> PretrainedCfg:
+    model_with_tag = variant
+    pretrained_tag = None
+    if pretrained_cfg:
+        if isinstance(pretrained_cfg, dict):
+            # pretrained_cfg dict passed as arg, validate by converting to PretrainedCfg
+            pretrained_cfg = PretrainedCfg(**pretrained_cfg)
+        elif isinstance(pretrained_cfg, str):
+            pretrained_tag = pretrained_cfg
+            pretrained_cfg = None
+
+    # fallback to looking up pretrained cfg in model registry by variant identifier
+    if not pretrained_cfg:
+        if pretrained_tag:
+            model_with_tag = '.'.join([variant, pretrained_tag])
+        pretrained_cfg = get_pretrained_cfg(model_with_tag)
+
+    if not pretrained_cfg:
+        _logger.warning(
+            f"No pretrained configuration specified for {model_with_tag} model. Using a default."
+            f" Please add a config to the model pretrained_cfg registry or pass explicitly.")
+        pretrained_cfg = PretrainedCfg()  # instance with defaults
+
+    pretrained_cfg_overlay = pretrained_cfg_overlay or {}
+    if not pretrained_cfg.architecture:
+        pretrained_cfg_overlay.setdefault('architecture', variant)
+    pretrained_cfg = dataclasses.replace(pretrained_cfg, **pretrained_cfg_overlay)
+
+    return pretrained_cfg
+
+
+def build_model_with_cfg(
+        model_cls: Callable,
+        variant: str,
+        pretrained: bool,
+        pretrained_cfg: Optional[Dict] = None,
+        pretrained_cfg_overlay: Optional[Dict] = None,
+        model_cfg: Optional[Any] = None,
+        feature_cfg: Optional[Dict] = None,
+        pretrained_strict: bool = True,
+        pretrained_filter_fn: Optional[Callable] = None,
+        kwargs_filter: Optional[Tuple[str]] = None,
+        **kwargs,
+):
+    """ Build model with specified default_cfg and optional model_cfg
+
+    This helper fn aids in the construction of a model including:
+      * handling default_cfg and associated pretrained weight loading
+      * passing through optional model_cfg for models with config based arch spec
+      * features_only model adaptation
+      * pruning config / model adaptation
+
+    Args:
+        model_cls: model class
+        variant: model variant name
+        pretrained: load pretrained weights
+        pretrained_cfg: model's pretrained weight/task config
+        model_cfg: model's architecture config
+        feature_cfg: feature extraction adapter config
+        pretrained_strict: load pretrained weights strictly
+        pretrained_filter_fn: filter callable for pretrained weights
+        kwargs_filter: kwargs to filter before passing to model
+        **kwargs: model args passed through to model __init__
+    """
+    pruned = kwargs.pop('pruned', False)
+    features = False
+    feature_cfg = feature_cfg or {}
+
+    # resolve and update model pretrained config and model kwargs
+    pretrained_cfg = resolve_pretrained_cfg(
+        variant,
+        pretrained_cfg=pretrained_cfg,
+        pretrained_cfg_overlay=pretrained_cfg_overlay
+    )
+
+    # FIXME converting back to dict, PretrainedCfg use should be propagated further, but not into model
+    pretrained_cfg = pretrained_cfg.to_dict()
+
+    _update_default_model_kwargs(pretrained_cfg, kwargs, kwargs_filter)
+
+    # Setup for feature extraction wrapper done at end of this fn
+    if kwargs.pop('features_only', False):
+        features = True
+        feature_cfg.setdefault('out_indices', (0, 1, 2, 3, 4))
+        if 'out_indices' in kwargs:
+            feature_cfg['out_indices'] = kwargs.pop('out_indices')
+        if 'feature_cls' in kwargs:
+            feature_cfg['feature_cls'] = kwargs.pop('feature_cls')
+
+    # Instantiate the model
+    if model_cfg is None:
+        model = model_cls(**kwargs)
+    else:
+        model = model_cls(cfg=model_cfg, **kwargs)
+    model.pretrained_cfg = pretrained_cfg
+    model.default_cfg = model.pretrained_cfg  # alias for backwards compat
+
+    if pruned:
+        model = adapt_model_from_file(model, variant)
+
+    # For classification models, check class attr, then kwargs, then default to 1k, otherwise 0 for feats
+    num_classes_pretrained = 0 if features else getattr(model, 'num_classes', kwargs.get('num_classes', 1000))
+    if pretrained:
+        load_pretrained(
+            model,
+            pretrained_cfg=pretrained_cfg,
+            num_classes=num_classes_pretrained,
+            in_chans=kwargs.get('in_chans', 3),
+            filter_fn=pretrained_filter_fn,
+            strict=pretrained_strict,
+        )
+
+    # Wrap the model in a feature extraction module if enabled
+    if features:
+        use_getter = False
+        if 'feature_cls' in feature_cfg:
+            feature_cls = feature_cfg.pop('feature_cls')
+            if isinstance(feature_cls, str):
+                feature_cls = feature_cls.lower()
+
+                # flatten_sequential only valid for some feature extractors
+                if feature_cls not in ('dict', 'list', 'hook'):
+                    feature_cfg.pop('flatten_sequential', None)
+
+                if 'hook' in feature_cls:
+                    feature_cls = FeatureHookNet
+                elif feature_cls == 'list':
+                    feature_cls = FeatureListNet
+                elif feature_cls == 'dict':
+                    feature_cls = FeatureDictNet
+                elif feature_cls == 'fx':
+                    feature_cls = FeatureGraphNet
+                elif feature_cls == 'getter':
+                    use_getter = True
+                    feature_cls = FeatureGetterNet
+                else:
+                    assert False, f'Unknown feature class {feature_cls}'
+        else:
+            feature_cls = FeatureListNet
+
+        output_fmt = getattr(model, 'output_fmt', None)
+        if output_fmt is not None and not use_getter:  # don't set default for intermediate feat getter
+            feature_cfg.setdefault('output_fmt', output_fmt)
+
+        model = feature_cls(model, **feature_cfg)
+        model.pretrained_cfg = pretrained_cfg_for_features(pretrained_cfg)  # add back pretrained cfg
+        model.default_cfg = model.pretrained_cfg  # alias for rename backwards compat (default_cfg -> pretrained_cfg)
+
+    return model
-- 
Gitee


From e644d4ee832178860a12382fdfb19ce8a87e2887 Mon Sep 17 00:00:00 2001
From: tianxi-yi <xinchi.tian@iluvatar.com>
Date: Wed, 22 May 2024 17:17:41 +0800
Subject: [PATCH 4/5] Fix the file path for correction

---
 models/cv/classification/efficientnet_v2/ixrt/README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md
index 5071a1ab..aa6aa4c0 100755
--- a/models/cv/classification/efficientnet_v2/ixrt/README.md
+++ b/models/cv/classification/efficientnet_v2/ixrt/README.md
@@ -31,7 +31,8 @@ cd pytorch-image-models/timm/models
 rm _builder.py
 mv /Path/ixrt/_builder.py pytorch-image-models/timm/models
 cd pytorch-image-models/timm
-mv /Path/to/efficientnetv2_t_agc-3620981a.pth /root/.cache/torch/hub/checkpoints/
+mkdir -p /root/.cache/torch/hub/checkpoints/
+wget -P /root/.cache/torch/hub/checkpoints/ https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_t_agc-3620981a.pth
 python3 -m models.export_onnx --output_model checkpoints/efficientnet.onnx
 ```
 
@@ -56,9 +57,9 @@ bash script/infer_efficientnet_fp16_performance.sh
 ### INT8
 ```bash
 # Accuracy
-bash script/infer_efficientnet_int8_accuracy.sh
+bash scripts/infer_efficientnet_int8_accuracy.sh
 # Performance
-bash script/infer_efficientnet_int8_performance.sh
+bash scripts/infer_efficientnet_int8_performance.sh
 ```
 
 
-- 
Gitee


From 768a2f5b2e5fefa03f68acaa9a903f5710171729 Mon Sep 17 00:00:00 2001
From: tianxi-yi <xinchi.tian@iluvatar.com>
Date: Wed, 22 May 2024 17:30:20 +0800
Subject: [PATCH 5/5] Fix script to scripts

---
 models/cv/classification/efficientnet_v2/ixrt/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md
index aa6aa4c0..fda6110a 100755
--- a/models/cv/classification/efficientnet_v2/ixrt/README.md
+++ b/models/cv/classification/efficientnet_v2/ixrt/README.md
@@ -49,9 +49,9 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
 ```bash
 # Accuracy
-bash script/infer_efficientnet_fp16_accuracy.sh
+bash scripts/infer_efficientnet_fp16_accuracy.sh
 # Performance
-bash script/infer_efficientnet_fp16_performance.sh
+bash scripts/infer_efficientnet_fp16_performance.sh
 ```
 
 ### INT8
-- 
Gitee