diff --git a/README.md b/README.md
index d33db3066de534e8fb3aead86ab9ab5ea63b252c..90c697a015b9f333988e3a205fb5176f0b4749b8 100644
--- a/README.md
+++ b/README.md
@@ -396,15 +396,15 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- Wide_ResNet50 |
+ Wide ResNet50 |
FP16 |
Supported |
- - |
+ Supported |
INT8 |
Supported |
- - |
+ Supported |
diff --git a/models/cv/classification/wide_resnet50/igie/README.md b/models/cv/classification/wide_resnet50/igie/README.md
index 50120e88b90dca143ed5b9ce856af7d3903f5aa8..c3bebf1738ae76036f696eb7f1f0e6a80f93553b 100644
--- a/models/cv/classification/wide_resnet50/igie/README.md
+++ b/models/cv/classification/wide_resnet50/igie/README.md
@@ -1,4 +1,4 @@
-# WideResNet50
+# Wide ResNet50
## Description
@@ -51,7 +51,7 @@ bash scripts/infer_wide_resnet50_int8_performance.sh
## Results
-Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%)
--------------|-----------|----------|----------|----------|--------
-WideResNet50 | 32 | FP16 | 2312.383 | 78.459 | 94.052
-WideResNet50 | 32 | INT8 | 5195.654 | 77.957 | 93.798
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ------------- | --------- | --------- | -------- | -------- | -------- |
+| Wide ResNet50 | 32 | FP16 | 2312.383 | 78.459 | 94.052 |
+| Wide ResNet50 | 32 | INT8 | 5195.654 | 77.957 | 93.798 |
diff --git a/models/cv/classification/wide_resnet50/ixrt/README.md b/models/cv/classification/wide_resnet50/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..72dd1308b11b2dd7f6237e8c7ec782c99107e0c2
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/README.md
@@ -0,0 +1,61 @@
+# Wide ResNet50
+
+## Description
+
+The distinguishing feature of Wide ResNet50 lies in its widened architecture compared to traditional ResNet models. By increasing the width of the residual blocks, Wide ResNet50 enhances the capacity of the network to capture richer and more diverse feature representations, leading to improved performance on various visual recognition tasks.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+mkdir -p checkpoints/
+python3 export.py --weight wide_resnet50_2-95faca4d.pth --output checkpoints/wide_resnet50.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+export CHECKPOINTS_DIR=./checkpoints
+export RUN_DIR=./
+export CONFIG_DIR=config/WIDE_RESNET50_CONFIG
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_wide_resnet50_fp16_accuracy.sh
+# Performance
+bash scripts/infer_wide_resnet50_fp16_performance.sh
+```
+
+### INT8
+
+```bash
+# Accuracy
+bash scripts/infer_wide_resnet50_int8_accuracy.sh
+# Performance
+bash scripts/infer_wide_resnet50_int8_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ------------- | --------- | --------- | -------- | -------- | -------- |
+| Wide ResNet50 | 32 | FP16 | 2478.551 | 78.486 | 94.084 |
+| Wide ResNet50 | 32 | INT8 | 5981.702 | 76.956 | 93.920 |
diff --git a/models/cv/classification/wide_resnet50/ixrt/build_engine.py b/models/cv/classification/wide_resnet50/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..01e126bc715aa77d38c3abdd1e02191a262689e7
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/build_engine.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+from calibration_dataset import getdataloader
+import cuda.cudart as cudart
+
+def assertSuccess(err):
+ assert(err == cudart.cudaError_t.cudaSuccess)
+
+class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2):
+
+ def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224):
+ super().__init__()
+ self.cache_file = cache_file
+ self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz)
+ self.batch_generator = iter(self.image_batcher)
+ size = img_sz*img_sz*3*bsz
+ __import__('pdb').set_trace()
+ err, self.batch_allocation = cudart.cudaMalloc(size)
+ assertSuccess(err)
+
+ def __del__(self):
+ err,= cudart.cudaFree(self.batch_allocation)
+ assertSuccess(err)
+
+ def get_batch_size(self):
+ return self.image_batcher.batch_size
+
+ def get_batch(self, names):
+ try:
+ batch, _ = next(self.batch_generator)
+ batch = batch.numpy()
+ __import__('pdb').set_trace()
+ cudart.cudaMemcpy(self.batch_allocation,
+ np.ascontiguousarray(batch),
+ batch.nbytes,
+ cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
+ return [int(self.batch_allocation)]
+ except StopIteration:
+ return None
+
+ def read_calibration_cache(self):
+ if os.path.exists(self.cache_file):
+ with open(self.cache_file, "rb") as f:
+ return f.read()
+
+ def write_calibration_cache(self, cache):
+ with open(self.cache_file, "wb") as f:
+ f.write(cache)
+
+def main(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ print("precision : ", precision)
+ build_config.set_flag(precision)
+ if config.precision == "int8":
+ build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir)
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+ help="The precision of datatype")
+ parser.add_argument("--engine", type=str, default=None)
+ parser.add_argument(
+ "--datasets_dir",
+ type=str,
+ default="",
+ help="ImageNet dir",
+ )
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/")
+ # print(cali.get_batch_size())
+ # print(cali.get_batch("hello"))
+ args = parse_args()
+ main(args)
diff --git a/models/cv/classification/wide_resnet50/ixrt/build_i8_engine.py b/models/cv/classification/wide_resnet50/ixrt/build_i8_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..6038b33f50cff7a14efcefa6673ae9d2fd19870b
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/build_i8_engine.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+import json
+import os
+
+import tensorrt
+import tensorrt as trt
+
+TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE)
+
+EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+
+
+def GiB(val):
+ return val * 1 << 30
+
+
+def json_load(filename):
+ with open(filename) as json_file:
+ data = json.load(json_file)
+ return data
+
+
+def setDynamicRange(network, json_file):
+ """Sets ranges for network layers."""
+ quant_param_json = json_load(json_file)
+ act_quant = quant_param_json["act_quant_info"]
+
+ for i in range(network.num_inputs):
+ input_tensor = network.get_input(i)
+ if act_quant.__contains__(input_tensor.name):
+ print(input_tensor.name)
+ value = act_quant[input_tensor.name]
+ tensor_max = abs(value)
+ tensor_min = -abs(value)
+ input_tensor.dynamic_range = (tensor_min, tensor_max)
+
+ for i in range(network.num_layers):
+ layer = network.get_layer(i)
+
+ for output_index in range(layer.num_outputs):
+ tensor = layer.get_output(output_index)
+
+ if act_quant.__contains__(tensor.name):
+ value = act_quant[tensor.name]
+ tensor_max = abs(value)
+ tensor_min = -abs(value)
+ tensor.dynamic_range = (tensor_min, tensor_max)
+ else:
+ print("\033[1;32m%s\033[0m" % tensor.name)
+
+
+def build_engine(onnx_file, json_file, engine_file):
+ builder = trt.Builder(TRT_LOGGER)
+ network = builder.create_network(EXPLICIT_BATCH)
+
+ config = builder.create_builder_config()
+
+ # If it is a dynamic onnx model , you need to add the following.
+ # profile = builder.create_optimization_profile()
+ # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w))
+ # config.add_optimization_profile(profile)
+
+ parser = trt.OnnxParser(network, TRT_LOGGER)
+ # config.max_workspace_size = GiB(1)
+ if not os.path.exists(onnx_file):
+ quit("ONNX file {} not found".format(onnx_file))
+
+ with open(onnx_file, "rb") as model:
+ if not parser.parse(model.read()):
+ print("ERROR: Failed to parse the ONNX file.")
+ for error in range(parser.num_errors):
+ print(parser.get_error(error))
+ return None
+
+ config.set_flag(trt.BuilderFlag.INT8)
+
+ setDynamicRange(network, json_file)
+
+ engine = builder.build_engine(network, config)
+
+ with open(engine_file, "wb") as f:
+ f.write(engine.serialize())
+
+
+if __name__ == "__main__":
+ # Add plugins if needed
+ # import ctypes
+ # ctypes.CDLL("libmmdeploy_tensorrt_ops.so")
+ parser = argparse.ArgumentParser(
+ description="Writing qparams to onnx to convert tensorrt engine."
+ )
+ parser.add_argument("--onnx", type=str, default=None)
+ parser.add_argument("--qparam_json", type=str, default=None)
+ parser.add_argument("--engine", type=str, default=None)
+ arg = parser.parse_args()
+
+ build_engine(arg.onnx, arg.qparam_json, arg.engine)
+ print("\033[1;32mgenerate %s\033[0m" % arg.engine)
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/calibration_dataset.py b/models/cv/classification/wide_resnet50/ixrt/calibration_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec931c656abf5b2309dc9938490df46e4e8cdb19
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/calibration_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+ def __init__(self, *args, **kwargs):
+ super(CalibrationImageNet, self).__init__(*args, **kwargs)
+ img2label_path = os.path.join(self.root, "val_map.txt")
+ if not os.path.exists(img2label_path):
+ raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+ self.img2label_map = self.make_img2label_map(img2label_path)
+
+ def make_img2label_map(self, path):
+ with open(path) as f:
+ lines = f.readlines()
+
+ img2lable_map = dict()
+ for line in lines:
+ line = line.lstrip().rstrip().split("\t")
+ if len(line) != 2:
+ continue
+ img_name, label = line
+ img_name = img_name.strip()
+ if img_name in [None, ""]:
+ continue
+ label = int(label.strip())
+ img2lable_map[img_name] = label
+ return img2lable_map
+
+ def __getitem__(self, index):
+ path, target = self.samples[index]
+ sample = self.loader(path)
+ if self.transform is not None:
+ sample = self.transform(sample)
+ # if self.target_transform is not None:
+ # target = self.target_transform(target)
+ img_name = os.path.basename(path)
+ target = self.img2label_map[img_name]
+
+ return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+ dataset = CalibrationImageNet(
+ data_path,
+ transform=T.Compose(
+ [
+ T.Resize(256),
+ T.CenterCrop(img_sz),
+ T.ToTensor(),
+ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+ ]
+ ),
+ )
+
+ calibration_dataset = dataset
+ if num_samples is not None:
+ calibration_dataset = torch.utils.data.Subset(
+ dataset, indices=range(num_samples)
+ )
+
+ calibration_dataloader = DataLoader(
+ calibration_dataset,
+ shuffle=True,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ verify_dataloader = DataLoader(
+ dataset,
+ shuffle=False,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+ num_samples = min(total_sample, step * batch_size)
+ if step < 0:
+ num_samples = None
+ calibration_dataloader, _ = create_dataloaders(
+ dataset_dir,
+ img_sz=img_sz,
+ batch_size=batch_size,
+ workers=workers,
+ num_samples=num_samples,
+ )
+ return calibration_dataloader
diff --git a/models/cv/classification/wide_resnet50/ixrt/common.py b/models/cv/classification/wide_resnet50/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..0458195e5b7980ce70585d7284ca8a875afa3fd6
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/common.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+import pycuda.driver as cuda
+
+def eval_batch(batch_score, batch_label):
+ batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+ values, indices = batch_score.topk(5)
+ top1, top5 = 0, 0
+ for idx, label in enumerate(batch_label):
+
+ if label == indices[idx][0]:
+ top1 += 1
+ if label in indices[idx]:
+ top5 += 1
+ return top1, top5
+
+def create_engine_context(engine_path, logger):
+ with open(engine_path, "rb") as f:
+ runtime = tensorrt.Runtime(logger)
+ assert runtime
+ engine = runtime.deserialize_cuda_engine(f.read())
+ assert engine
+ context = engine.create_execution_context()
+ assert context
+
+ return engine, context
+
+def get_io_bindings(engine):
+ # Setup I/O bindings
+ inputs = []
+ outputs = []
+ allocations = []
+
+ for i in range(engine.num_bindings):
+ is_input = False
+ if engine.binding_is_input(i):
+ is_input = True
+ name = engine.get_binding_name(i)
+ dtype = engine.get_binding_dtype(i)
+ shape = engine.get_binding_shape(i)
+ if is_input:
+ batch_size = shape[0]
+ size = np.dtype(tensorrt.nptype(dtype)).itemsize
+ for s in shape:
+ size *= s
+ allocation = cuda.mem_alloc(size)
+ binding = {
+ "index": i,
+ "name": name,
+ "dtype": np.dtype(tensorrt.nptype(dtype)),
+ "shape": list(shape),
+ "allocation": allocation,
+ }
+ print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}")
+ allocations.append(allocation)
+ if engine.binding_is_input(i):
+ inputs.append(binding)
+ else:
+ outputs.append(binding)
+ return inputs, outputs, allocations
diff --git a/models/cv/classification/wide_resnet50/ixrt/config/WIDE_RESNET50_CONFIG b/models/cv/classification/wide_resnet50/ixrt/config/WIDE_RESNET50_CONFIG
new file mode 100644
index 0000000000000000000000000000000000000000..04e6b34078b14979940a6f5b0747b8032ab6fc2a
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/config/WIDE_RESNET50_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=Wide_Resnet50
+ORIGINE_MODEL=wide_resnet50.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+ # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema]
+ # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape)
+ # QUANT_STEP : 量化步数
+ # QUANT_SEED : 随机种子 保证量化结果可复现
+ # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=minmax
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/wide_resnet50/ixrt/export.py b/models/cv/classification/wide_resnet50/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d3c64c825ab3aaf172f0c6ca7ef9b802ea06bf9
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ model = torchvision.models.wide_resnet50_2()
+ model.load_state_dict(torch.load(args.weight))
+ model.eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/wide_resnet50/ixrt/inference.py b/models/cv/classification/wide_resnet50/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c9dcb3f9cc5b9a26903651a31fafa16d8f0db31
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/inference.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+import pycuda.autoinit
+import pycuda.driver as cuda
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+ dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+ host_mem = tensorrt.IHostMemory
+ logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+ # Load Engine && I/O bindings
+ engine, context = create_engine_context(config.engine_file, logger)
+ inputs, outputs, allocations = get_io_bindings(engine)
+
+ # Warm up
+ if config.warm_up > 0:
+ print("\nWarm Start.")
+ for i in range(config.warm_up):
+ context.execute_v2(allocations)
+ print("Warm Done.")
+
+ # Inference
+ if config.test_mode == "FPS":
+ torch.cuda.synchronize()
+ start_time = time.time()
+
+ for i in range(config.loop_count):
+ context.execute_v2(allocations)
+
+ torch.cuda.synchronize()
+ end_time = time.time()
+ forward_time = end_time - start_time
+
+ num_samples = 50000
+ if config.loop_count * config.bsz < num_samples:
+ num_samples = config.loop_count * config.bsz
+ fps = num_samples / forward_time
+
+ print("FPS : ", fps)
+ print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+ if fps >= config.fps_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+ elif config.test_mode == "ACC":
+
+ ## Prepare the output data
+ output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+ print(f"output shape : {output.shape} output type : {output.dtype}")
+
+ total_sample = 0
+ acc_top1, acc_top5 = 0, 0
+
+ with tqdm(total= len(dataloader)) as _tqdm:
+ for idx, (batch_data, batch_label) in enumerate(dataloader):
+ batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+ batch_data = np.ascontiguousarray(batch_data)
+ total_sample += batch_data.shape[0]
+
+ cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+ context.execute_v2(allocations)
+ cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+ # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+ if len(output.shape) == 4:
+ output = output.squeeze(axis=(2,3))
+
+ batch_top1, batch_top5 = eval_batch(output, batch_label)
+ acc_top1 += batch_top1
+ acc_top5 += batch_top5
+
+ _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+ acc_5='{:.4f}'.format(acc_top5/total_sample))
+ _tqdm.update(1)
+
+ print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+ print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+ acc1 = acc_top1/total_sample
+ print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+ if acc1 >= config.acc_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+def parse_config():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+ parser.add_argument(
+ "--engine_file",
+ type=str,
+ help="engine file path"
+ )
+ parser.add_argument(
+ "--datasets_dir",
+ type=str,
+ default="",
+ help="ImageNet dir",
+ )
+ parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+ parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+ parser.add_argument(
+ "--imgsz",
+ "--img",
+ "--img-size",
+ type=int,
+ default=224,
+ help="inference size h,w",
+ )
+ parser.add_argument("--use_async", action="store_true")
+ parser.add_argument(
+ "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+ )
+ parser.add_argument("--fps_target", type=float, default=-1.0)
+ parser.add_argument("--acc_target", type=float, default=-1.0)
+ parser.add_argument("--loop_count", type=int, default=-1)
+
+ config = parser.parse_args()
+ return config
+
+if __name__ == "__main__":
+ config = parse_config()
+ main(config)
diff --git a/models/cv/classification/wide_resnet50/ixrt/modify_batchsize.py b/models/cv/classification/wide_resnet50/ixrt/modify_batchsize.py
new file mode 100644
index 0000000000000000000000000000000000000000..689b7a972dcbfec77c185592ede16bb4f04fa4fd
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/modify_batchsize.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+ batch_size = bsz
+
+ # The following code changes the first dimension of every input to be batch_size
+ # Modify as appropriate ... note that this requires all inputs to
+ # have the same batch_size
+ inputs = model.graph.input
+ for input in inputs:
+ # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+ # Add checks as needed.
+ dim1 = input.type.tensor_type.shape.dim[0]
+ # update dim to be a symbolic value
+ if isinstance(batch_size, str):
+ # set dynamic batch size
+ dim1.dim_param = batch_size
+ elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+ # set given batch size
+ dim1.dim_value = int(batch_size)
+ else:
+ # set batch size of 1
+ dim1.dim_value = 1
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--batch_size", type=int)
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+
+
+
+
diff --git a/models/cv/classification/wide_resnet50/ixrt/quant.py b/models/cv/classification/wide_resnet50/ixrt/quant.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d71c828629bb0370aa40c5bcdcf117812bbaedc
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/quant.py
@@ -0,0 +1,166 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包:
+
+在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。
+
+This file will show you how to quantize your network with PPQ
+ You should prepare your model and calibration dataset as follow:
+
+ ~/working/model.onnx <-- your model
+ ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset
+
+if you are using caffe model:
+ ~/working/model.caffemdoel <-- your model
+ ~/working/model.prototext <-- your model
+
+### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ###
+
+quantized model will be generated at: ~/working/quantized.onnx
+"""
+from ppq import *
+from ppq.api import *
+import os
+from calibration_dataset import getdataloader
+import argparse
+import random
+import numpy as np
+import torch
+
+
+def setseed(seed=42):
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model_name", type=str)
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--dataset_dir", type=str, default="imagenet_val")
+ parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"],
+ default="hist_percentile")
+ parser.add_argument("--disable_quant_names", nargs='*', type=str)
+ parser.add_argument("--save_dir", type=str, help="save path", default=None)
+ parser.add_argument("--bsz", type=int, default=32)
+ parser.add_argument("--step", type=int, default=20)
+ parser.add_argument("--seed", type=int, default=42)
+ parser.add_argument("--imgsz", type=int, default=224)
+ args = parser.parse_args()
+ print("Quant config:", args)
+ print(args.disable_quant_names)
+ return args
+
+
+config = parse_args()
+
+# modify configuration below:
+WORKING_DIRECTORY = 'checkpoints' # choose your working directory
+TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform
+MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE
+INPUT_LAYOUT = 'chw' # input data layout, chw or hwc
+NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network
+EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'.
+REQUIRE_ANALYSE = False
+TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络
+# -------------------------------------------------------------------
+# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式
+# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx
+# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。
+# -------------------------------------------------------------------
+graph = None
+if MODEL_TYPE == NetworkFramework.ONNX:
+ graph = load_onnx_graph(onnx_import_file=config.model)
+if MODEL_TYPE == NetworkFramework.CAFFE:
+ graph = load_caffe_graph(
+ caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'),
+ prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt'))
+assert graph is not None, 'Graph Loading Error, Check your input again.'
+
+# -------------------------------------------------------------------
+# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等
+# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化
+# -------------------------------------------------------------------
+QS = QuantizationSettingFactory.default_setting()
+
+# -------------------------------------------------------------------
+# 下面向你展示了如何使用 finetuning 过程提升量化精度
+# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度
+# 开启他们的方式都是 QS.xxxx = True
+# 按需使用,不要全部打开,容易起飞
+# -------------------------------------------------------------------
+if TRAINING_YOUR_NETWORK:
+ QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差
+ QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟
+ QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu'
+
+
+dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz)
+# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的
+# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可
+with ENABLE_CUDA_KERNEL():
+ print('网络正量化中,根据你的量化配置,这将需要一段时间:')
+ quantized = quantize_native_model(
+ setting=QS, # setting 对象用来控制标准量化逻辑
+ model=graph,
+ calib_dataloader=dataloader,
+ calib_steps=config.step,
+ input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参
+ inputs=None,
+ # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)]
+ collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理,
+ # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None
+ platform=TARGET_PLATFORM,
+ device=EXECUTING_DEVICE,
+ do_quantize=True)
+
+ # -------------------------------------------------------------------
+ # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor
+ # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果
+ # 请注意,必须在 export 之前执行此操作。
+ # -------------------------------------------------------------------
+ executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE)
+ # output = executor.forward(input)
+
+ # -------------------------------------------------------------------
+ # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量
+ # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10%
+ # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差
+ # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的
+ # 你需要使用 layerwise_error_analyse 逐层分析误差的来源
+ # -------------------------------------------------------------------
+ print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:')
+ reports = graphwise_error_analyse(
+ graph=quantized, running_device=EXECUTING_DEVICE, steps=32,
+ dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE))
+ for op, snr in reports.items():
+ if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化')
+
+ if REQUIRE_ANALYSE:
+ print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:')
+ layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE,
+ interested_outputs=None,
+ dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))
+
+ # -------------------------------------------------------------------
+ # 使用 export_ppq_graph 函数来导出量化后的模型
+ # PPQ 会根据你所选择的导出平台来修改模型格式
+ # -------------------------------------------------------------------
+ print('网络量化结束,正在生成目标文件:')
+ export_ppq_graph(
+ graph=quantized, platform=TARGET_PLATFORM,
+ graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"),
+ config_save_to=os.path.join(config.save_dir, 'quant_cfg.json'))
diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_model.py b/models/cv/classification/wide_resnet50/ixrt/refine_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f1e6c2f6325651556267ceed7e4403a565a2f69
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/refine_model.py
@@ -0,0 +1,290 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+import argparse
+import dataclasses
+
+import torch
+import onnx
+
+from refine_utils.matmul_to_gemm_pass import FusedGemmPass
+from refine_utils.linear_pass import FusedLinearPass
+
+from refine_utils.common import *
+
+def get_constant_input_name_of_operator(graph: Graph, operator: Operator):
+ const = None
+ for input in operator.inputs:
+ if not graph.containe_var(input):
+ continue
+
+ if not graph.is_leaf_variable(input):
+ continue
+
+ input_var = graph.get_variable(input)
+ if input_var.value is not None:
+ const = input
+ return const
+
+class FuseLayerNormPass(BasePass):
+
+ def process(self, graph: Graph) -> Graph:
+ self.transform = GraphTransform(graph)
+ find_sequence_subgraph(
+ graph,
+ [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD],
+ self.fuse_layer_norm,
+ strict=False
+ )
+ return graph
+
+ def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph):
+ # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的
+ if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]:
+ return
+
+ # 检查 POW 的输入是否和 DIV 的输入是一致的
+ if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]:
+ return
+
+ # 检查部分算子的输出是否被多个算子使用
+ nodes = pattern.nodes
+ for node in [nodes[0]] + nodes[2:-1]:
+ next_ops = graph.get_next_operators(node.operator)
+ if len(next_ops) > 1:
+ return
+
+ eps = None
+ for input in nodes[4].operator.inputs:
+ input_var = graph.get_variable(input)
+ if input_var.value is not None and graph.is_leaf_variable(input):
+ eps = to_py_type(input_var.value)
+
+ scale = get_constant_input_name_of_operator(graph, nodes[-2].operator)
+ bias = get_constant_input_name_of_operator(graph, nodes[-1].operator)
+
+ self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator)
+
+ bias_var = graph.get_variable(bias)
+ print(bias_var)
+
+ attributes = {
+ "axis": nodes[0].operator.attributes.axes,
+ "epsilon": eps,
+ }
+
+
+ layer_norm_op = self.transform.make_operator(
+ op_type="LayerNormalization",
+ inputs=[nodes[0].operator.inputs[0], scale, bias],
+ outputs=[nodes[-1].operator.outputs[0]],
+ **attributes
+ )
+
+ self.transform.add_operator(layer_norm_op)
+
+class FusedGeluPass(BasePass):
+
+ def process(self, graph: Graph) -> Graph:
+ self.transform = GraphTransform(graph)
+
+ find_sequence_subgraph(
+ graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True
+ )
+ return graph
+
+ def fuse_gelu(self, graph: Graph, pattern: PatternGraph):
+ nodes = pattern.nodes
+ prev_op = self.transform.get_previous_operators(nodes[0].operator)[0]
+ next_ops = self.transform.get_next_operators(prev_op)
+ if len(next_ops) != 2:
+ return
+
+ if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops:
+ return
+
+ gelu_op_input = None
+ for input in nodes[3].operator.inputs:
+ if input in nodes[0].operator.inputs:
+ gelu_op_input = input
+ break
+
+ self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator)
+
+ gelu_op = self.transform.make_operator(
+ op_type=OP.GELU,
+ inputs=[gelu_op_input],
+ outputs=[nodes[-1].operator.outputs[0]]
+ )
+ self.transform.add_operator(gelu_op)
+
+@dataclasses.dataclass
+class NormalizeAttr(BaseOperatorAttr):
+ p: float = 2.0
+ epsilon: float = 1e-12
+ axis: int = 1
+
+
+@registe_operator(OP.GELU)
+class GeluOperator(BaseOperator):
+
+ def call(
+ self,
+ executor,
+ operator: Operator,
+ inputs: List,
+ attr: NormalizeAttr,
+ ):
+ return F.gelu(inputs[0])
+
+ def convert_onnx_operator(
+ self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto
+ ) -> Operator:
+ return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr)
+
+ def quantize(
+ self,
+ graph: Graph,
+ op: Operator,
+ operator_observer_config: QuantOperatorObserverConfig,
+ quant_outputs: bool = False,
+ ):
+ return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs)
+
+
+
+class ClearUnsedVariables(BasePass):
+
+ def process(self, graph: Graph) -> Graph:
+ vars = list(graph.variables)
+
+ for var in vars:
+ if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var):
+ graph.delete_variable(var)
+
+ quant_params = list(graph.quant_parameters.keys())
+ for var in quant_params:
+ if not graph.containe_var(var):
+ graph.quant_parameters.pop(var)
+
+ return graph
+
+class FormatLayerNorm(BasePass):
+
+ def process(self, graph: Graph) -> Graph:
+ for op in graph.operators.values():
+ if "LayerNorm" in op.op_type:
+ self.format_layer_norm(graph, op)
+ return graph
+
+ def format_layer_norm(self, graph, operator):
+ if not hasattr(operator.attributes, "axis"):
+ return
+ if isinstance(operator.attributes.axis, (tuple, list)):
+ operator.attributes.axis = operator.attributes.axis[0]
+
+class FormatReshape(BasePass):
+
+ def process(self, graph: Graph) -> Graph:
+ for op in graph.operators.values():
+ if op.op_type == "Reshape":
+ self.format_reshape(graph, op)
+
+ return graph
+
+ def format_reshape(self, graph, operator):
+ shape = graph.get_variable(operator.inputs[1])
+ shape.value = torch.tensor(shape.value, dtype=torch.int64)
+
+class FormatScalar(BasePass):
+
+ def process(self, graph: Graph):
+ for var in graph.variables.values():
+ var: Variable
+ use_ops = graph.get_dst_operators(var)
+
+ if len(use_ops) == 0:
+ continue
+
+ if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]:
+ continue
+
+ if var.value is not None and var.value.ndim == 0:
+ var.value = var.value.reshape(1)
+ print(f"Reshape scalar to tensor for {var.name}.")
+
+ return graph
+
+class RenamePass(BasePass):
+
+ def process(self, graph:Graph):
+
+ names = [name for name in graph.operators.keys()]
+ for old_name in names:
+ new_name = old_name.replace("/", "#")
+
+ graph.rename_operator(old_name, new_name)
+
+ names = [name for name in graph.variables.keys()]
+ for name in names:
+ new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out")
+
+ graph.rename_vaiable(name, new_name,
+ with_variables=True,
+ with_operator_outputs=True)
+
+ return graph
+
+def create_pipeline(example_inputs):
+ return PassSequence(
+ # FuseLayerNormPass(),
+ FusedGeluPass(),
+
+ # ClearUnsedVariables(),
+ # FormatLayerNorm(),
+ # FormatReshape(),
+ # FormatScalar(),
+ # RenamePass()
+ )
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--onnx_path", type=str)
+ parser.add_argument("--dst_onnx_path", type=str)
+
+ parser.add_argument("--bsz", type=int, default=8,
+ help="Batch size")
+ parser.add_argument("--imgsz", type=int, default=224,
+ help="Image size")
+
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+
+ example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz)
+
+ refine_pipline = Pipeline(
+ create_source(f"{args.onnx_path}", example_inputs=example_inputs),
+ create_pipeline(example_inputs),
+ create_target(
+ f"{args.dst_onnx_path}",
+ example_inputs=example_inputs,
+ )
+ )
+ refine_pipline.run()
+
+ print(f"refine the model, input shape={example_inputs.shape}")
diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_utils/__init__.py b/models/cv/classification/wide_resnet50/ixrt/refine_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_utils/common.py b/models/cv/classification/wide_resnet50/ixrt/refine_utils/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..2af19a14df73cea6ba27ad6a8ad020fe0bec7aaa
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/refine_utils/common.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+from typing import Union, Callable, List
+
+from tensorrt.deploy.api import *
+from tensorrt.deploy.backend.onnx.converter import default_converter
+from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type
+from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr
+from tensorrt.deploy.ir.operator_type import OperatorType as OP
+from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name
+from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence
+from tensorrt.deploy.ir import Graph
+from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator
+from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator
+
+def find_sequence_subgraph(graph,
+ pattern: Union[List[str], PatternGraph],
+ callback: Callable[[Graph, PatternGraph], None],
+ strict=True):
+ if isinstance(pattern, List):
+ pattern = build_sequence_graph(pattern)
+
+ matcher = GraphMatcher(pattern, strict=strict)
+ return matcher.findall(graph, callback)
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_utils/linear_pass.py b/models/cv/classification/wide_resnet50/ixrt/refine_utils/linear_pass.py
new file mode 100644
index 0000000000000000000000000000000000000000..29b5e4a96e6edc448168bd78ede3111f6b59c032
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/refine_utils/linear_pass.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import dataclasses
+
+from refine_utils.common import *
+
+# AXB=C, Only for B is initializer
+
+class FusedLinearPass(BasePass):
+
+ def process(self, graph: Graph) -> Graph:
+ self.transform = GraphTransform(graph)
+
+ find_sequence_subgraph(
+ graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True
+ )
+ find_sequence_subgraph(
+ graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True
+ )
+ return graph
+
+ def to_linear_with_bias(self, graph, pattern: PatternGraph):
+ matmul = pattern.nodes[0]
+ add = pattern.nodes[1]
+ if len(add.operator.inputs) != 2:
+ return
+
+ b_var = graph.get_variable(matmul.operator.inputs[1])
+ if not graph.is_leaf_variable(b_var) or b_var.value is None:
+ return
+
+ if b_var.value.ndim != 2:
+ return
+
+ bias_var = None
+ for input in add.operator.inputs:
+ if input not in matmul.operator.outputs:
+ bias_var = input
+
+ inputs = matmul.operator.inputs
+ inputs.append(bias_var)
+ outputs = add.operator.outputs
+
+ b_var.value = b_var.value.transpose(1, 0)
+ b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0]
+
+ hidden_size = b_var.shape[1]
+ linear_dim = b_var.shape[0]
+
+ attributes = {
+ "hidden_size": hidden_size,
+ "linear_dim": linear_dim,
+ "has_bias": 1,
+ "act_type":"none"
+ }
+
+ self.transform.make_operator(
+ "LinearFP16",
+ inputs=inputs,
+ outputs=outputs,
+ **attributes
+ )
+
+ self.transform.delete_operator(add.operator)
+ self.transform.delete_operator(matmul.operator)
+
+ def to_linear(self, graph, pattern: PatternGraph):
+ matmul = pattern.nodes[0]
+ if len(matmul.operator.inputs) != 2:
+ return
+
+ b_var = graph.get_variable(matmul.operator.inputs[1])
+ if not graph.is_leaf_variable(b_var) or b_var.value is None:
+ return
+
+ if b_var.value.ndim != 2:
+ return
+
+ attributes = {
+ "hidden_size": hidden_size,
+ "linear_dim": linear_dim,
+ "has_bias": 0,
+ "act_type": "none"
+ }
+
+ b_var.value = b_var.value.transpose(1, 0)
+ b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0]
+
+ hidden_size = b_var.shape[1]
+ linear_dim = b_var.shape[0]
+
+ op = self.transform.make_operator(
+ op_type = "LinearFP16",
+ inputs = pattern.nodes[0].operator.inputs,
+ outputs=[pattern.nodes[-1].operator.outputs[0]],
+ **attributes
+ )
+
+ self.transform.add_operator(op)
+
+ self.transform.delete_operator(matmul.operator)
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/wide_resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ebfac4d917d6b05e46187f025c3c17184096e80
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+from refine_utils.common import *
+
+#
+# Common pattern Matmul to Gemm
+#
+class FusedGemmPass(BasePass):
+
+ def process(self, graph: Graph) -> Graph:
+ self.transform = GraphTransform(graph)
+
+ find_sequence_subgraph(
+ graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True
+ )
+ return graph
+
+ def to_gemm(self, graph, pattern: PatternGraph):
+ matmul_op = pattern.nodes[0]
+ inputs = matmul_op.operator.inputs
+ outputs = matmul_op.operator.outputs
+
+ if len(inputs)!=2 and len(outputs)!=1:
+ return
+
+ for input in inputs:
+ if self.transform.is_leaf_variable(input):
+ return
+
+ print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}")
+ self.transform.delete_operator(matmul_op.operator)
+
+ op = self.transform.make_operator(
+ op_type = "Gemm",
+ inputs = inputs,
+ outputs = outputs,
+ alpha = 1,
+ beta = 1,
+ transB = 1
+ )
+
+ self.transform.add_operator(op)
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_accuracy.sh b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b743d7084ae058118c29daaf494769fc293ceb41
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_accuracy.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --acc_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_performance.sh b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e7a4f1a7276406a0ed7400af4368b5bec2a06e06
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_performance.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --fps_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_accuracy.sh b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..367bdd4bd22be28f96cd3c6719888d0ca889c612
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_accuracy.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+set -x
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=int8
+export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Model Input Name : ${MODEL_INPUT_NAME}
+echo Model Output Name : ${MODEL_OUTPUT_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+ echo [STEP ${step}] : Simplify Model
+ if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+ else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+ fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+ let step++
+ echo;
+ echo [STEP ${step}] : Quant Model
+ if [[ -z ${QUANT_EXIST_ONNX} ]];then
+ QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+ fi
+ if [[ -f ${QUANT_EXIST_ONNX} ]];then
+ SIM_MODEL=${QUANT_EXIST_ONNX}
+ echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+ else
+ python3 ${RUN_DIR}/quant.py \
+ --model ${SIM_MODEL} \
+ --model_name ${MODEL_NAME} \
+ --dataset_dir ${DATASETS_DIR} \
+ --observer ${QUANT_OBSERVER} \
+ --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+ --save_dir $CHECKPOINTS_DIR \
+ --bsz ${QUANT_BATCHSIZE} \
+ --step ${QUANT_STEP} \
+ --seed ${QUANT_SEED} \
+ --imgsz ${IMGSIZE}
+ SIM_MODEL=${QUANT_EXIST_ONNX}
+ echo " "Generate ${SIM_MODEL}
+ fi
+fi
+
+ # Change Batchsize
+ let step++
+ echo;
+ echo [STEP ${step}] : Change Batchsize
+ FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx
+ if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+ else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+ fi
+
+ # Build Engine
+ let step++
+ echo;
+ echo [STEP ${step}] : Build Engine
+ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+ if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+ else
+ python3 ${RUN_DIR}/build_i8_engine.py \
+ --onnx ${FINAL_MODEL} \
+ --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+ fi
+
+# Inference
+# let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --acc_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_performance.sh b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..468c557de451ddab0024ef2c69e9fa42751a50ce
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_performance.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=int8
+export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Model Input Name : ${MODEL_INPUT_NAME}
+echo Model Output Name : ${MODEL_OUTPUT_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+ echo [STEP ${step}] : Simplify Model
+ if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+ else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+ fi
+
+# Quant Model
+if [ $PRECISION == "int8" ];then
+ let step++
+ echo;
+ echo [STEP ${step}] : Quant Model
+ if [[ -z ${QUANT_EXIST_ONNX} ]];then
+ QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
+ fi
+ if [[ -f ${QUANT_EXIST_ONNX} ]];then
+ SIM_MODEL=${QUANT_EXIST_ONNX}
+ echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
+ else
+ python3 ${RUN_DIR}/quant.py \
+ --model ${SIM_MODEL} \
+ --model_name ${MODEL_NAME} \
+ --dataset_dir ${DATASETS_DIR} \
+ --observer ${QUANT_OBSERVER} \
+ --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
+ --save_dir $CHECKPOINTS_DIR \
+ --bsz ${QUANT_BATCHSIZE} \
+ --step ${QUANT_STEP} \
+ --seed ${QUANT_SEED} \
+ --imgsz ${IMGSIZE}
+ SIM_MODEL=${QUANT_EXIST_ONNX}
+ echo " "Generate ${SIM_MODEL}
+ fi
+fi
+
+ # Change Batchsize
+ let step++
+ echo;
+ echo [STEP ${step}] : Change Batchsize
+ FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx
+ if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+ else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+ fi
+
+ # Build Engine
+ let step++
+ echo;
+ echo [STEP ${step}] : Build Engine
+ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+ if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+ else
+ python3 ${RUN_DIR}/build_i8_engine.py \
+ --onnx ${FINAL_MODEL} \
+ --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+ fi
+
+# Inference
+# let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --acc_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/simplify_model.py b/models/cv/classification/wide_resnet50/ixrt/simplify_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..9948a9fa083ff99ff88e556e96614b02cccaa965
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/simplify_model.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+ onnx_model = onnx.load(args.origin_model)
+ model_simp, check = simplify(onnx_model)
+ model_simp = onnx.shape_inference.infer_shapes(model_simp)
+ onnx.save(model_simp, args.output_model)
+ print(" Simplify onnx Done.")
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ parser.add_argument("--reshape", action="store_true")
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+simplify_model(args)
+
+
+
+
diff --git a/models/cv/detection/yolov4/ixrt/README.md b/models/cv/detection/yolov4/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..886a87aec3bc59e730e1b9fb3436fe07c8179600
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/README.md
@@ -0,0 +1,82 @@
+# YOLOv4
+
+## Description
+
+YOLOv4 employs a two-step process, involving regression for bounding box positioning and classification for object categorization. it amalgamates past YOLO family research contributions with novel features like WRC, CSP, CmBN, SAT, Mish activation, Mosaic data augmentation, DropBlock regularization, and CIoU loss.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install pycocotools
+pip3 install pycuda
+```
+
+### Download
+
+Pretrained cfg:
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# clone yolov4
+git clone https://github.com/Tianxiaomo/pytorch-YOLOv4.git yolov4
+
+# download weight
+mkdir data
+wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights -P data
+
+# export onnx model
+python3 export.py --cfg yolov4/cfg/yolov4.cfg --weight data/yolov4.weights --batchsize 16 --output data/yolov4.onnx
+mv yolov4_16_3_608_608_static.onnx data/yolov4.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim data/yolov4.onnx data/yolov4_sim.onnx
+
+# Make sure the dataset path is "data/coco"
+```
+
+## Inference
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_yolov4darknet_fp16_accuary.sh
+# Performance
+bash scripts/infer_yolov4darknet_fp16_performance.sh
+```
+
+### INT8
+
+```bash
+# Accuracy
+bash scripts/infer_yolov4darknet_int8_accuracy.sh
+# Performance
+bash scripts/infer_yolov4darknet_int8_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | MAP@0.5 |
+| ------ | --------- | --------- | ------ | ------- |
+| YOLOv4 | 32 | FP16 | 303.27 | 0.730 |
+| YOLOv4 | 32 | INT8 | 682.14 | 0.608 |
+
+## Reference
+
+DarkNet:
+Pytorch-YOLOv4:
diff --git a/models/cv/detection/yolov4/ixrt/build_engine.py b/models/cv/detection/yolov4/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec4080edd3c275a4595cbfb407a21cebdada7fa7
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/build_engine.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+from tensorrt import Dims
+
+from load_ixrt_plugin import load_ixrt_plugin
+load_ixrt_plugin()
+
+
+def build_engine_trtapi_staticshape(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ # print("precision : ", precision)
+ build_config.set_flag(precision)
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+ print("Build static shape engine done!")
+
+
+def build_engine_trtapi_dynamicshape(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+
+ profile = builder.create_optimization_profile()
+ profile.set_shape("input",
+ Dims([1, 3, 608, 608]),
+ Dims([32, 3, 608, 608]),
+ Dims([64, 3, 608, 608]),
+ )
+ build_config.add_optimization_profile(profile)
+
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ # print("precision : ", precision)
+ build_config.set_flag(precision)
+
+ # set dynamic
+ num_inputs = network.num_inputs
+ for i in range(num_inputs):
+ input_tensor = network.get_input(i)
+ input_tensor.shape = Dims([-1, 3, 608, 608])
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+ print("Build dynamic shape engine done!")
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+ help="The precision of datatype")
+ # engine args
+ parser.add_argument("--engine", type=str, default=None)
+
+ args = parser.parse_args()
+ return args
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ build_engine_trtapi_staticshape(args)
+ # build_engine_trtapi_dynamicshape(args)
diff --git a/models/cv/detection/yolov4/ixrt/coco_labels.py b/models/cv/detection/yolov4/ixrt/coco_labels.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fc21282c7fa393e9d15e8bdc16c741dc7e78448
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/coco_labels.py
@@ -0,0 +1,103 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+labels = [
+ "person",
+ "bicycle",
+ "car",
+ "motorcycle",
+ "airplane",
+ "bus",
+ "train",
+ "truck",
+ "boat",
+ "traffic light",
+ "fire hydrant",
+ "stop sign",
+ "parking meter",
+ "bench",
+ "bird",
+ "cat",
+ "dog",
+ "horse",
+ "sheep",
+ "cow",
+ "elephant",
+ "bear",
+ "zebra",
+ "giraffe",
+ "backpack",
+ "umbrella",
+ "handbag",
+ "tie",
+ "suitcase",
+ "frisbee",
+ "skis",
+ "snowboard",
+ "sports ball",
+ "kite",
+ "baseball bat",
+ "baseball glove",
+ "skateboard",
+ "surfboard",
+ "tennis racket",
+ "bottle",
+ "wine glass",
+ "cup",
+ "fork",
+ "knife",
+ "spoon",
+ "bowl",
+ "banana",
+ "apple",
+ "sandwich",
+ "orange",
+ "broccoli",
+ "carrot",
+ "hot dog",
+ "pizza",
+ "donut",
+ "cake",
+ "chair",
+ "couch",
+ "potted plant",
+ "bed",
+ "dining table",
+ "toilet",
+ "tv",
+ "laptop",
+ "mouse",
+ "remote",
+ "keyboard",
+ "cell phone",
+ "microwave",
+ "oven",
+ "toaster",
+ "sink",
+ "refrigerator",
+ "book",
+ "clock",
+ "vase",
+ "scissors",
+ "teddy bear",
+ "hair drier",
+ "toothbrush",
+]
+def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
+ return [
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
+
+__all__ = ["labels"]
diff --git a/models/cv/detection/yolov4/ixrt/common.py b/models/cv/detection/yolov4/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc3c2766533fa5a334a61231adb168ecf09622c3
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/common.py
@@ -0,0 +1,335 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+import cv2
+import glob
+import time
+import numpy as np
+from tqdm import tqdm
+
+import tensorrt
+import pycuda.driver as cuda
+
+
+def load_class_names(namesfile):
+ class_names = []
+ with open(namesfile, 'r') as fp:
+ lines = fp.readlines()
+ for line in lines:
+ line = line.rstrip()
+ class_names.append(line)
+ return class_names
+
+# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)]
+# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)]
+def box_class85to6(input):
+ center_x_y = input[:, :2]
+ side = input[:, 2:4]
+ conf = input[:, 4:5]
+ class_id = np.argmax(input[:, 5:], axis = -1)
+ class_id = class_id.astype(np.float32).reshape(-1, 1) + 1
+ max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1)
+ x1_y1 = center_x_y - 0.5 * side
+ x2_y2 = center_x_y + 0.5 * side
+ nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1)
+ return nms_input
+
+def save2json(batch_img_id, pred_boxes, json_result, class_trans):
+ for i, boxes in enumerate(pred_boxes):
+ if boxes is not None:
+ image_id = int(batch_img_id[i])
+ # have no target
+ if image_id == -1:
+ continue
+
+ for x1, y1, x2, y2, _, p, c in boxes:
+ x1, y1, x2, y2, p = float(x1), float(y1), float(x2), float(y2), float(p)
+ c = int(c)
+ x = x1
+ y = y1
+ w = x2 - x1
+ h = y2 - y1
+
+ json_result.append(
+ {
+ "image_id": image_id,
+ "category_id": class_trans[c - 1],
+ "bbox": [x, y, w, h],
+ "score": p,
+ }
+ )
+
+################## About TensorRT #################
+def create_engine_context(engine_path, logger):
+ with open(engine_path, "rb") as f:
+ runtime = tensorrt.Runtime(logger)
+ assert runtime
+ engine = runtime.deserialize_cuda_engine(f.read())
+ assert engine
+ context = engine.create_execution_context()
+ assert context
+
+ return engine, context
+
+def setup_io_bindings(engine, context):
+ # Setup I/O bindings
+ inputs = []
+ outputs = []
+ allocations = []
+
+ for i in range(engine.num_bindings):
+ is_input = False
+ if engine.binding_is_input(i):
+ is_input = True
+ name = engine.get_binding_name(i)
+ dtype = engine.get_binding_dtype(i)
+ shape = context.get_binding_shape(i)
+ if is_input:
+ batch_size = shape[0]
+ size = np.dtype(tensorrt.nptype(dtype)).itemsize
+ for s in shape:
+ size *= s
+ allocation = cuda.mem_alloc(size)
+ binding = {
+ "index": i,
+ "name": name,
+ "dtype": np.dtype(tensorrt.nptype(dtype)),
+ "shape": list(shape),
+ "allocation": allocation,
+ }
+ # print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}")
+ allocations.append(allocation)
+ if engine.binding_is_input(i):
+ inputs.append(binding)
+ else:
+ outputs.append(binding)
+ return inputs, outputs, allocations
+##########################################################
+
+
+################## About Loading Dataset #################
+def load_images(images_path):
+ """
+ If image path is given, return it directly
+ For txt file, read it and return each line as image path
+ In other case, it's a folder, return a list with names of each
+ jpg, jpeg and png file
+ """
+ input_path_extension = images_path.split('.')[-1]
+ if input_path_extension in ['jpg', 'jpeg', 'png']:
+ return [images_path]
+ elif input_path_extension == "txt":
+ with open(images_path, "r") as f:
+ return f.read().splitlines()
+ else:
+ return glob.glob(
+ os.path.join(images_path, "*.jpg")) + \
+ glob.glob(os.path.join(images_path, "*.png")) + \
+ glob.glob(os.path.join(images_path, "*.jpeg"))
+
+def prepare_batch(images_path, bs=16, input_size=(608, 608)):
+
+ width, height = input_size
+
+ batch_names = []
+ batch_images = []
+ batch_shapes = []
+
+ temp_names = []
+ temp_images = []
+ temp_shapes = []
+
+ for i, image_path in tqdm(enumerate(images_path), desc="Loading coco data"):
+ name = os.path.basename(image_path)
+ image = cv2.imread(image_path)
+ h, w, _ = image.shape
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ image_resized = cv2.resize(image_rgb, (width, height),
+ interpolation=cv2.INTER_LINEAR)
+ custom_image = image_resized.transpose(2, 0, 1).astype(np.float32) / 255.
+ custom_image = np.expand_dims(custom_image, axis=0)
+
+ if i != 0 and i % bs == 0:
+ batch_names.append(temp_names)
+ batch_images.append(np.concatenate(temp_images, axis=0))
+ batch_shapes.append(temp_shapes)
+
+ temp_names = [name]
+ temp_images = [custom_image]
+ temp_shapes = [(h, w)]
+ else:
+ temp_names.append(name)
+ temp_images.append(custom_image)
+ temp_shapes.append((h, w))
+
+ return batch_names, batch_images, batch_shapes
+##########################################################
+
+
+################## About Operating box #################
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+ # Resize and pad image while meeting stride-multiple constraints
+ shape = im.shape[:2] # current shape [height, width]
+ if isinstance(new_shape, int):
+ new_shape = (new_shape, new_shape)
+
+ # Scale ratio (new / old)
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+ if not scaleup: # only scale down, do not scale up (for better val mAP)
+ r = min(r, 1.0)
+
+ # Compute padding
+ ratio = r, r # width, height ratios
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
+ if auto: # minimum rectangle
+ dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
+ elif scaleFill: # stretch
+ dw, dh = 0.0, 0.0
+ new_unpad = (new_shape[1], new_shape[0])
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
+
+ dw /= 2 # divide padding into 2 sides
+ dh /= 2
+
+ if shape[::-1] != new_unpad: # resize
+ im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+ im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
+ return im, ratio, (dw, dh)
+
+def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False):
+ # Rescale boxes (xyxy) from net_shape to ori_shape
+
+ if use_letterbox:
+
+ gain = min(
+ net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1]
+ ) # gain = new / old
+ pad = (net_shape[1] - ori_shape[1] * gain) / 2, (
+ net_shape[0] - ori_shape[0] * gain
+ ) / 2.0
+
+ boxes[:, [0, 2]] -= pad[0] # x padding
+ boxes[:, [1, 3]] -= pad[1] # y padding
+ boxes[:, :4] /= gain
+ else:
+ x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0]
+
+ boxes[:, 0] /= x_scale
+ boxes[:, 1] /= y_scale
+ boxes[:, 2] /= x_scale
+ boxes[:, 3] /= y_scale
+
+ clip_boxes(boxes, ori_shape)
+ return boxes
+
+def clip_boxes(boxes, shape):
+
+ boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
+ boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
+##########################################################
+
+
+################## About pre and post processing #########
+def pre_processing(src_img, imgsz=608):
+ resized = cv2.resize(src_img, (imgsz, imgsz), interpolation=cv2.INTER_LINEAR)
+ in_img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
+ in_img = np.transpose(in_img, (2, 0, 1)).astype(np.float32)
+ in_img = np.expand_dims(in_img, axis=0)
+ in_img /= 255.0
+ return in_img
+
+def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
+ # print(boxes.shape)
+ x1 = boxes[:, 0]
+ y1 = boxes[:, 1]
+ x2 = boxes[:, 2]
+ y2 = boxes[:, 3]
+
+ areas = (x2 - x1) * (y2 - y1)
+ order = confs.argsort()[::-1]
+
+ keep = []
+ while order.size > 0:
+ idx_self = order[0]
+ idx_other = order[1:]
+
+ keep.append(idx_self)
+
+ xx1 = np.maximum(x1[idx_self], x1[idx_other])
+ yy1 = np.maximum(y1[idx_self], y1[idx_other])
+ xx2 = np.minimum(x2[idx_self], x2[idx_other])
+ yy2 = np.minimum(y2[idx_self], y2[idx_other])
+
+ w = np.maximum(0.0, xx2 - xx1)
+ h = np.maximum(0.0, yy2 - yy1)
+ inter = w * h
+
+ if min_mode:
+ over = inter / np.minimum(areas[order[0]], areas[order[1:]])
+ else:
+ over = inter / (areas[order[0]] + areas[order[1:]] - inter)
+
+ inds = np.where(over <= nms_thresh)[0]
+ order = order[inds + 1]
+
+ return np.array(keep)
+
+
+def post_processing(img, conf_thresh, nms_thresh, output, num_classes=80):
+
+ # [batch, num, 1, 4]
+ box_array = output[:, :, :4]
+ # [batch, num, 2]
+ class_confs = output[:, :, 4:]
+
+ max_conf = class_confs[:, :, 1]
+ max_id = class_confs[:, :, 0]
+
+ bboxes_batch = []
+ for i in range(box_array.shape[0]):
+
+ argwhere = max_conf[i] > conf_thresh
+ l_box_array = box_array[i, argwhere, :]
+ l_max_conf = max_conf[i, argwhere]
+ l_max_id = max_id[i, argwhere]
+
+ bboxes = []
+ # nms for each class
+ for j in range(num_classes):
+
+ cls_argwhere = l_max_id == j
+ ll_box_array = l_box_array[cls_argwhere, :]
+ ll_max_conf = l_max_conf[cls_argwhere]
+ ll_max_id = l_max_id[cls_argwhere]
+
+ keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
+
+ if (keep.size > 0):
+ ll_box_array = ll_box_array[keep, :]
+ ll_max_conf = ll_max_conf[keep]
+ ll_max_id = ll_max_id[keep]
+
+ for k in range(ll_box_array.shape[0]):
+ bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2],
+ ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
+
+ bboxes_batch.append(bboxes)
+
+ return bboxes_batch
+##########################################################
+
diff --git a/models/cv/detection/yolov4/ixrt/cut_model.py b/models/cv/detection/yolov4/ixrt/cut_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf4f88dae926b8d15356c7f6b48d89fe80dc9f2a
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/cut_model.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+from onnxsim import simplify
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--input_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ parser.add_argument("--input_names", nargs='+', type=str)
+ parser.add_argument("--output_names", nargs='+', type=str)
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names)
+print(" Cut Model Done.")
diff --git a/models/cv/detection/yolov4/ixrt/deploy.py b/models/cv/detection/yolov4/ixrt/deploy.py
new file mode 100644
index 0000000000000000000000000000000000000000..084356ec8cb14a0604bf994faca4ce15834e4b15
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/deploy.py
@@ -0,0 +1,210 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+import copy
+
+from typing import Union, Callable, List
+
+from tensorrt.deploy.api import *
+from tensorrt.deploy.backend.onnx.converter import default_converter
+from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type
+from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr
+from tensorrt.deploy.ir.operator_type import OperatorType as OP
+from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name
+from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence
+from tensorrt.deploy.ir import Graph
+from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator
+from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator
+from tensorrt.deploy.api import GraphTransform, create_source, create_target
+
+class FuseMishPass(BasePass):
+ def process(self, graph: Graph) -> Graph:
+ pattern = build_sequence_graph([OP.SOFTPLUS, OP.TANH, OP.MUL])
+
+ matcher = GraphMatcher(pattern, strict=False)
+ self.transform = GraphTransform(graph)
+ matcher.findall(graph, self.fuse_mish)
+ return graph
+
+ def fuse_mish(self, graph: Graph, pattern_graph: PatternGraph):
+ softplus = pattern_graph.nodes[0].operator
+ mul = pattern_graph.nodes[-1].operator
+
+ if not self.can_fused(graph, pattern_graph):
+ return
+
+ self.transform.delete_operators_between_op_op(softplus, mul)
+
+ mish_op = Operator(
+ name=generate_operator_name(graph, pattern="Mish_{idx}"),
+ op_type=OP.MISH,
+ inputs=copy.copy(softplus.inputs),
+ outputs=copy.copy(mul.outputs),
+ )
+ mish_op.is_quant_operator = softplus.is_quant_operator and mul.is_quant_operator
+ graph.add_operator(mish_op)
+
+ def can_fused(self, graph: Graph, pattern_graph: PatternGraph):
+ softplus = pattern_graph.nodes[0].operator
+ mul = pattern_graph.nodes[-1].operator
+
+ # 检查 Softplus, tanh 的输出是不是只有一个 OP 使用
+ # 如果有多个 OP 使用,则不能融合
+ for node in pattern_graph.nodes[:2]:
+ next_ops = graph.get_next_operators(node.operator)
+ if len(next_ops) != 1:
+ return False
+
+ # 检查 Mul 的输入是不是和 Softplus 是同源的
+ softplus_prev_op = graph.get_previous_operators(softplus)
+ if len(softplus_prev_op) != 1:
+ return False
+
+ mul_prev_op = graph.get_previous_operators(mul)
+ if len(mul_prev_op) != 2:
+ return False
+
+ for op in mul_prev_op:
+ if op is softplus_prev_op[0]:
+ return True
+
+ return False
+
+
+class Transform:
+ def __init__(self, graph):
+ self.t = GraphTransform(graph)
+ self.graph = graph
+
+ def ReplaceFocus(self, input_edge, outputs, to_op):
+ input_var = self.graph.get_variable(input_edge)
+ op = self.graph.get_operator(to_op)
+ self.t.delete_operators_between_var_op(
+ from_var=input_var, to_op=op
+ )
+ self.t.make_operator(
+ "Focus", inputs=input_edge, outputs=outputs
+ )
+ return self.graph
+
+ def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes):
+ if attributes["anchor"] is None:
+ del attributes["anchor"]
+ self.t.make_operator(
+ op_type, inputs=inputs, outputs=outputs, **attributes
+ )
+ return self.graph
+
+ def AddConcatOp(self, inputs: list, outputs, **attributes):
+ self.t.make_operator(
+ "Concat", inputs=inputs, outputs=outputs, **attributes
+ )
+ return self.graph
+
+def customize_ops(graph, args):
+ t = Transform(graph)
+ fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None
+ if fuse_focus:
+ graph = t.ReplaceFocus(
+ input_edge=args.focus_input,
+ outputs=args.focus_output,
+ to_op=args.focus_last_node
+ )
+ decoder_input = args.decoder_input_names
+ num = len(decoder_input) // 3
+ graph = t.AddYoloDecoderOp(
+ inputs=decoder_input[:num],
+ outputs=["decoder_8"],
+ op_type=args.decoder_type,
+ anchor=args.decoder8_anchor,
+ num_class=args.num_class,
+ stride=8,
+ faster_impl=args.faster
+ )
+ graph = t.AddYoloDecoderOp(
+ inputs=decoder_input[num:num*2],
+ outputs=["decoder_16"],
+ op_type=args.decoder_type,
+ anchor=args.decoder16_anchor,
+ num_class=args.num_class,
+ stride=16,
+ faster_impl=args.faster
+ )
+ graph = t.AddYoloDecoderOp(
+ inputs=decoder_input[num*2:num*2+1],
+ outputs=["decoder_32"],
+ op_type=args.decoder_type,
+ anchor=args.decoder32_anchor,
+ num_class=args.num_class,
+ stride=32,
+ faster_impl=args.faster
+ )
+ if args.decoder64_anchor is not None:
+ graph = t.AddYoloDecoderOp(
+ inputs=decoder_input[num*2+1:],
+ outputs=["decoder_64"],
+ op_type=args.decoder_type,
+ anchor=args.decoder64_anchor,
+ num_class=args.num_class,
+ stride=64,
+ faster_impl=args.faster
+ )
+ graph = t.AddConcatOp(
+ inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"],
+ outputs=["output"],
+ axis=1
+ )
+ else:
+ graph = t.AddConcatOp(
+ inputs=["decoder_32", "decoder_16", "decoder_8"],
+ outputs=["output"],
+ axis=1
+ )
+
+ graph.outputs.clear()
+ graph.add_output("output")
+ graph.outputs["output"].dtype = "FLOAT"
+ return graph
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--src", type=str)
+ parser.add_argument("--dst", type=str)
+ parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"])
+ parser.add_argument("--decoder_input_names", nargs='+', type=str)
+ parser.add_argument("--decoder8_anchor", nargs='*', type=int)
+ parser.add_argument("--decoder16_anchor", nargs='*', type=int)
+ parser.add_argument("--decoder32_anchor", nargs='*', type=int)
+ parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None)
+ parser.add_argument("--num_class", type=int, default=80)
+ parser.add_argument("--faster", type=int, default=1)
+ parser.add_argument("--focus_input", type=str, default=None)
+ parser.add_argument("--focus_output", type=str, default=None)
+ parser.add_argument("--focus_last_node", type=str, default=None)
+ args = parser.parse_args()
+ return args
+
+
+if __name__ == "__main__":
+
+ args = parse_args()
+ graph = create_source(args.src)()
+ graph = customize_ops(graph, args)
+ graph = FuseMishPass().process(graph)
+ create_target(saved_path=args.dst).export(graph)
+ print("Surged onnx lies on", args.dst)
diff --git a/models/cv/detection/yolov4/ixrt/export.py b/models/cv/detection/yolov4/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c8bbfa5aa79f1a982c340690658325d23fa4b54
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/export.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+sys.path.insert(0, "yolov4")
+import argparse
+
+from yolov4.tool.darknet2onnx import *
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="darknet cfg path.")
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="darknet weights path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="Onnx model batchsize.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ transform_to_onnx(args.cfg, args.weight, args.batchsize, args.output)
+
+if __name__ == "__main__":
+ main()
+
diff --git a/models/cv/detection/yolov4/ixrt/inference.py b/models/cv/detection/yolov4/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d740507b3a54bf2248000b2ac60d09f12a9886a
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/inference.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+import glob
+import json
+import os
+import time
+import sys
+from tqdm import tqdm
+
+import torch
+import numpy as np
+import tensorrt
+from tensorrt import Dims
+import pycuda.autoinit
+import pycuda.driver as cuda
+
+from coco_labels import coco80_to_coco91_class
+from common import save2json, box_class85to6
+from common import load_images, prepare_batch
+from common import create_engine_context, setup_io_bindings
+from common import scale_boxes, post_processing
+
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from load_ixrt_plugin import load_ixrt_plugin
+load_ixrt_plugin()
+
+
+
+def main(config):
+
+ # Step1: Load dataloader
+ images_path = load_images(config.eval_dir)
+ dataloader = prepare_batch(images_path, config.bsz)
+
+ # Step2: Load Engine
+ input_name = "input"
+ host_mem = tensorrt.IHostMemory
+ logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+ engine, context = create_engine_context(config.model_engine, logger)
+ input_idx = engine.get_binding_index(input_name)
+ context.set_binding_shape(input_idx, Dims((config.bsz,3,config.imgsz,config.imgsz)))
+ inputs, outputs, allocations = setup_io_bindings(engine, context)
+
+ # Warm up
+ if config.warm_up > 0:
+ print("\nWarm Start.")
+ for i in range(config.warm_up):
+ context.execute_v2(allocations)
+ print("Warm Done.")
+
+ json_result = []
+ forward_time = 0.0
+ class_map = coco80_to_coco91_class()
+ num_samples = 0
+ # Step3: Run on coco dataset
+ for batch_names, batch_images, batch_shapes in tqdm(zip(*dataloader)):
+ batch_data = np.ascontiguousarray(batch_images)
+ data_shape = batch_data.shape
+ h, w = zip(*batch_shapes)
+ batch_img_shape = [h, w]
+ batch_img_id = [int(x.split('.')[0]) for x in batch_names]
+
+ cur_bsz_sample = batch_images.shape[0]
+ num_samples += cur_bsz_sample
+ # Set input
+ input_idx = engine.get_binding_index(input_name)
+ context.set_binding_shape(input_idx, Dims(data_shape))
+ inputs, outputs, allocations = setup_io_bindings(engine, context)
+
+ cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+ # Prepare the output data
+ output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+ # print(f"output shape : {output.shape} output type : {output.dtype}")
+
+ # Forward
+ start_time = time.time()
+ context.execute_v2(allocations)
+ end_time = time.time()
+ forward_time += end_time - start_time
+
+ if config.test_mode == "MAP":
+ # Fetch output
+ cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+ pred_boxes = post_processing(None, 0.001, 0.6, output)
+
+ pred_results = []
+ # Calculate pred box on raw shape
+ for (pred_box, raw_shape) in zip(pred_boxes, batch_shapes):
+ h, w = raw_shape
+ if len(pred_box) == 0:continue # no detection results
+ pred_box = np.array(pred_box, dtype=np.float32)
+ pred_box = scale_boxes((config.imgsz, config.imgsz), pred_box, raw_shape, use_letterbox=False)
+
+ pred_results.append(pred_box.tolist())
+
+ save2json(batch_img_id, pred_results, json_result, class_map)
+
+ fps = num_samples / forward_time
+
+ if config.test_mode == "FPS":
+ print("FPS : ", fps)
+ print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+ if fps >= config.fps_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+ if config.test_mode == "MAP":
+ if len(json_result) == 0:
+ print("Predict zero box!")
+ exit(1)
+
+ if not os.path.exists(config.pred_dir):
+ os.makedirs(config.pred_dir)
+
+ pred_json = os.path.join(
+ config.pred_dir, f"{config.model_name}_{config.precision}_preds.json"
+ )
+ with open(pred_json, "w") as f:
+ json.dump(json_result, f)
+
+ anno_json = config.coco_gt
+ anno = COCO(anno_json) # init annotations api
+ pred = anno.loadRes(pred_json) # init predictions api
+ eval = COCOeval(anno, pred, "bbox")
+
+ eval.evaluate()
+ eval.accumulate()
+ print(
+ f"==============================eval {config.model_name} {config.precision} coco map =============================="
+ )
+ eval.summarize()
+
+ map, map50 = eval.stats[:2]
+ print("MAP@0.5 : ", map50)
+ print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+ if map50 >= config.map_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+
+def parse_config():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_name", type=str, default="YOLOV4", help="YOLOV3 YOLOV4 YOLOV5 YOLOV7 YOLOX"
+ )
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+ help="The precision of datatype")
+ parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+ parser.add_argument(
+ "--model_engine",
+ type=str,
+ default="",
+ help="model engine path",
+ )
+ parser.add_argument(
+ "--coco_gt",
+ type=str,
+ default="data/datasets/cv/coco2017/annotations/instances_val2017.json",
+ help="coco instances_val2017.json",
+ )
+ parser.add_argument("--warm_up", type=int, default=3, help="warm_up count")
+ parser.add_argument("--loop_count", type=int, default=-1, help="loop count")
+ parser.add_argument(
+ "--eval_dir",
+ type=str,
+ default="data/datasets/cv/coco2017/val2017",
+ help="coco image dir",
+ )
+ parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+ parser.add_argument(
+ "--imgsz",
+ "--img",
+ "--img-size",
+ type=int,
+ default=608,
+ help="inference size h,w",
+ )
+ parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs")
+ parser.add_argument("--map_target", type=float, default=0.56, help="target mAP")
+ parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps")
+
+ config = parser.parse_args()
+ print("config:", config)
+ return config
+
+
+if __name__ == "__main__":
+ config = parse_config()
+ main(config)
diff --git a/models/cv/detection/yolov4/ixrt/load_ixrt_plugin.py b/models/cv/detection/yolov4/ixrt/load_ixrt_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bb0abc21bd5806c51d6b908e3e3407cfdb62cc8
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/load_ixrt_plugin.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import ctypes
+import tensorrt
+from os.path import join, dirname, exists
+def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""):
+ if not dynamic_path:
+ dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so")
+ if not exists(dynamic_path):
+ raise FileNotFoundError(
+ f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
+ ctypes.CDLL(dynamic_path)
+ tensorrt.init_libnvinfer_plugins(logger, namespace)
+ print(f"Loaded plugin from {dynamic_path}")
diff --git a/models/cv/detection/yolov4/ixrt/quant.py b/models/cv/detection/yolov4/ixrt/quant.py
new file mode 100644
index 0000000000000000000000000000000000000000..70265cbc25d24d4ed41640c76f78a1839555f749
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/quant.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+import cv2
+import random
+import argparse
+import numpy as np
+from tensorrt.deploy import static_quantize
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from common import letterbox
+
+
+def setseed(seed=42):
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model_name", type=str)
+ parser.add_argument("--model", type=str, default="yolov4_bs16_without_decoder.onnx")
+ parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017")
+ parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json")
+ parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile")
+ parser.add_argument("--disable_quant_names", nargs='*', type=str)
+ parser.add_argument("--save_quant_model", type=str, help="save the quantization model path", default=None)
+ parser.add_argument("--bsz", type=int, default=16)
+ parser.add_argument("--step", type=int, default=32)
+ parser.add_argument("--seed", type=int, default=42)
+ parser.add_argument("--imgsz", type=int, default=608)
+ parser.add_argument("--use_letterbox", action="store_true")
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+setseed(args.seed)
+model_name = args.model_name
+
+
+def get_dataloader(data_dir, step=32, batch_size=16, new_shape=[608, 608], use_letterbox=False):
+ num = step * batch_size
+ val_list = [os.path.join(data_dir, x) for x in os.listdir(data_dir)]
+ random.shuffle(val_list)
+ pic_list = val_list[:num]
+
+ calibration_dataset = []
+ for file_path in pic_list:
+ pic_data = cv2.imread(file_path)
+ org_img = pic_data
+ assert org_img is not None, 'Image not Found ' + file_path
+ h0, w0 = org_img.shape[:2]
+
+ if use_letterbox:
+ img, ratio, dwdh = letterbox(org_img, new_shape=(new_shape[1], new_shape[0]), auto=False, scaleup=True)
+ else:
+ img = cv2.resize(org_img, new_shape)
+ img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
+ img = np.ascontiguousarray(img) / 255.0 # 0~1 np array
+ img = torch.from_numpy(img).float()
+
+ calibration_dataset.append(img)
+
+ calibration_dataloader = DataLoader(
+ calibration_dataset,
+ shuffle=True,
+ batch_size=batch_size,
+ drop_last=True
+ )
+ return calibration_dataloader
+
+dataloader = get_dataloader(
+ data_dir=args.dataset_dir,
+ step=args.step,
+ batch_size=args.bsz,
+ new_shape=(args.imgsz, args.imgsz),
+ use_letterbox=args.use_letterbox
+)
+
+dirname = os.path.dirname(args.save_quant_model)
+quant_json_path = os.path.join(dirname, f"quantized_{model_name}.json")
+
+static_quantize(args.model,
+ calibration_dataloader=dataloader,
+ save_quant_onnx_path=args.save_quant_model,
+ save_quant_params_path=quant_json_path,
+ observer=args.observer,
+ data_preprocess=lambda x: x.to("cuda"),
+ quant_format="qdq",
+ disable_quant_names=args.disable_quant_names)
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_accuary.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_accuary.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b732d4eb297b6319ad5bef4660a6f7dde0ef0abc
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_accuary.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
+DATASETS_DIR="${PROJ_DIR}/data/coco"
+COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
+EVAL_DIR=${DATASETS_DIR}/images/val2017
+CHECKPOINTS_DIR="${PROJ_DIR}/data"
+RUN_DIR="${PROJ_DIR}"
+ORIGINE_MODEL=${CHECKPOINTS_DIR}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo ====================== Model Info ======================
+echo Model Name : yolov4_darknet
+echo Onnx Path : ${ORIGINE_MODEL}
+
+BATCH_SIZE=16
+CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov4_sim.onnx
+
+# Cut decoder part
+echo "Cut decoder part"
+FINAL_MODEL=${CHECKPOINTS_DIR}/yolov4_bs${BATCH_SIZE}_without_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "CUT Model Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/cut_model.py \
+ --input_model ${CURRENT_MODEL} \
+ --output_model ${FINAL_MODEL} \
+ --input_names input \
+ --output_names /models.138/conv94/Conv_output_0 /models.149/conv102/Conv_output_0 /models.160/conv110/Conv_output_0
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# add decoder op
+FINAL_MODEL=${CHECKPOINTS_DIR}/yolov4_bs${BATCH_SIZE}_with_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Add Decoder Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/deploy.py \
+ --src ${CURRENT_MODEL} \
+ --dst ${FINAL_MODEL} \
+ --decoder_type YoloV3Decoder \
+ --decoder_input_names /models.138/conv94/Conv_output_0 /models.149/conv102/Conv_output_0 /models.160/conv110/Conv_output_0 \
+ --decoder8_anchor 12 16 19 36 40 28 \
+ --decoder16_anchor 36 75 76 55 72 146 \
+ --decoder32_anchor 142 110 192 243 459 401
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# Build Engine
+echo Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/yolov4_fp16.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision float16 \
+ --model ${CURRENT_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+echo Inference
+RUN_BATCH_SIZE=16
+python3 ${RUN_DIR}/inference.py \
+ --test_mode MAP \
+ --model_engine ${ENGINE_FILE} \
+ --warm_up 2 \
+ --bsz ${RUN_BATCH_SIZE} \
+ --imgsz 608 \
+ --loop_count 10 \
+ --eval_dir ${EVAL_DIR} \
+ --coco_gt ${COCO_GT} \
+ --pred_dir ${CHECKPOINTS_DIR} \
+ --precision float16 \
+ --map_target 0.30; check_status
+exit ${EXIT_STATUS}
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_performance.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..796dad720e13250b6ee81c66defca990c416e220
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_performance.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
+DATASETS_DIR="${PROJ_DIR}/data/coco"
+COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
+EVAL_DIR=${DATASETS_DIR}/images/val2017
+CHECKPOINTS_DIR="${PROJ_DIR}/data"
+RUN_DIR="${PROJ_DIR}"
+ORIGINE_MODEL=${CHECKPOINTS_DIR}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo ====================== Model Info ======================
+echo Model Name : yolov4_darknet
+echo Onnx Path : ${ORIGINE_MODEL}
+
+BATCH_SIZE=16
+CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov4_sim.onnx
+
+# Cut decoder part
+echo "Cut decoder part"
+FINAL_MODEL=${CHECKPOINTS_DIR}/yolov4_bs${BATCH_SIZE}_without_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "CUT Model Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/cut_model.py \
+ --input_model ${CURRENT_MODEL} \
+ --output_model ${FINAL_MODEL} \
+ --input_names input \
+ --output_names /models.138/conv94/Conv_output_0 /models.149/conv102/Conv_output_0 /models.160/conv110/Conv_output_0
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# add decoder op
+FINAL_MODEL=${CHECKPOINTS_DIR}/yolov4_bs${BATCH_SIZE}_with_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Add Decoder Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/deploy.py \
+ --src ${CURRENT_MODEL} \
+ --dst ${FINAL_MODEL} \
+ --decoder_type YoloV3Decoder \
+ --decoder_input_names /models.138/conv94/Conv_output_0 /models.149/conv102/Conv_output_0 /models.160/conv110/Conv_output_0 \
+ --decoder8_anchor 12 16 19 36 40 28 \
+ --decoder16_anchor 36 75 76 55 72 146 \
+ --decoder32_anchor 142 110 192 243 459 401
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# Build Engine
+echo Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/yolov4_fp16.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision float16 \
+ --model ${CURRENT_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+echo Inference
+RUN_BATCH_SIZE=16
+python3 ${RUN_DIR}/inference.py \
+ --test_mode FPS \
+ --model_engine ${ENGINE_FILE} \
+ --warm_up 2 \
+ --bsz ${RUN_BATCH_SIZE} \
+ --imgsz 608 \
+ --loop_count 10 \
+ --eval_dir ${EVAL_DIR} \
+ --coco_gt ${COCO_GT} \
+ --pred_dir ${CHECKPOINTS_DIR} \
+ --precision float16 \
+ --map_target 0.30; check_status
+exit ${EXIT_STATUS}
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_accuary.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_accuary.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c62d174c09e6f4b005a9b1e7ce028cc47643a930
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_accuary.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
+DATASETS_DIR="${PROJ_DIR}/data/coco"
+COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
+EVAL_DIR=${DATASETS_DIR}/images/val2017
+CHECKPOINTS_DIR="${PROJ_DIR}/data"
+RUN_DIR="${PROJ_DIR}"
+ORIGINE_MODEL=${CHECKPOINTS_DIR}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo ====================== Model Info ======================
+echo Model Name : yolov4_darknet
+echo Onnx Path : ${ORIGINE_MODEL}
+
+BATCH_SIZE=16
+CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov4_sim.onnx
+
+# Cut decoder part
+echo "Cut decoder part"
+FINAL_MODEL=${CHECKPOINTS_DIR}/yolov4_bs${BATCH_SIZE}_without_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "CUT Model Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/cut_model.py \
+ --input_model ${CURRENT_MODEL} \
+ --output_model ${FINAL_MODEL} \
+ --input_names input \
+ --output_names /models.138/conv94/Conv_output_0 /models.149/conv102/Conv_output_0 /models.160/conv110/Conv_output_0
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# quant
+FINAL_MODEL=${CHECKPOINTS_DIR}/quantized_yolov4_bs${BATCH_SIZE}_without_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/quant.py \
+ --model_name "YOLOV4_DARKNET" \
+ --model ${CURRENT_MODEL} \
+ --bsz ${BATCH_SIZE} \
+ --dataset_dir ${EVAL_DIR} \
+ --ann_file ${COCO_GT} \
+ --observer "hist_percentile" \
+ --save_quant_model ${FINAL_MODEL} \
+ --imgsz 608
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# add decoder op
+FINAL_MODEL=${CHECKPOINTS_DIR}/quantized_yolov4_bs${BATCH_SIZE}_with_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Add Decoder Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/deploy.py \
+ --src ${CURRENT_MODEL} \
+ --dst ${FINAL_MODEL} \
+ --decoder_type YoloV3Decoder \
+ --decoder_input_names /models.138/conv94/Conv_output_0 /models.149/conv102/Conv_output_0 /models.160/conv110/Conv_output_0 \
+ --decoder8_anchor 12 16 19 36 40 28 \
+ --decoder16_anchor 36 75 76 55 72 146 \
+ --decoder32_anchor 142 110 192 243 459 401
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# Build Engine
+echo Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/yolov4_int8.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision int8 \
+ --model ${CURRENT_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+echo Inference
+RUN_BATCH_SIZE=16
+python3 ${RUN_DIR}/inference.py \
+ --test_mode MAP \
+ --model_engine ${ENGINE_FILE} \
+ --warm_up 2 \
+ --bsz ${RUN_BATCH_SIZE} \
+ --imgsz 608 \
+ --loop_count 10 \
+ --eval_dir ${EVAL_DIR} \
+ --coco_gt ${COCO_GT} \
+ --pred_dir ${CHECKPOINTS_DIR} \
+ --precision int8 \
+ --map_target 0.30; check_status
+exit ${EXIT_STATUS}
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_performance.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2e335fa1d013961c136cda4f79fd2be712311494
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_performance.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
+DATASETS_DIR="${PROJ_DIR}/data/coco"
+COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
+EVAL_DIR=${DATASETS_DIR}/images/val2017
+CHECKPOINTS_DIR="${PROJ_DIR}/data"
+RUN_DIR="${PROJ_DIR}"
+ORIGINE_MODEL=${CHECKPOINTS_DIR}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo ====================== Model Info ======================
+echo Model Name : yolov4_darknet
+echo Onnx Path : ${ORIGINE_MODEL}
+
+BATCH_SIZE=16
+CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov4_sim.onnx
+
+# Cut decoder part
+echo "Cut decoder part"
+FINAL_MODEL=${CHECKPOINTS_DIR}/yolov4_bs${BATCH_SIZE}_without_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "CUT Model Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/cut_model.py \
+ --input_model ${CURRENT_MODEL} \
+ --output_model ${FINAL_MODEL} \
+ --input_names input \
+ --output_names /models.138/conv94/Conv_output_0 /models.149/conv102/Conv_output_0 /models.160/conv110/Conv_output_0
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# quant
+FINAL_MODEL=${CHECKPOINTS_DIR}/quantized_yolov4_bs${BATCH_SIZE}_without_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/quant.py \
+ --model_name "YOLOV4_DARKNET" \
+ --model ${CURRENT_MODEL} \
+ --bsz ${BATCH_SIZE} \
+ --dataset_dir ${EVAL_DIR} \
+ --ann_file ${COCO_GT} \
+ --observer "hist_percentile" \
+ --save_quant_model ${FINAL_MODEL} \
+ --imgsz 608
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# add decoder op
+FINAL_MODEL=${CHECKPOINTS_DIR}/quantized_yolov4_bs${BATCH_SIZE}_with_decoder.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Add Decoder Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/deploy.py \
+ --src ${CURRENT_MODEL} \
+ --dst ${FINAL_MODEL} \
+ --decoder_type YoloV3Decoder \
+ --decoder_input_names /models.138/conv94/Conv_output_0 /models.149/conv102/Conv_output_0 /models.160/conv110/Conv_output_0 \
+ --decoder8_anchor 12 16 19 36 40 28 \
+ --decoder16_anchor 36 75 76 55 72 146 \
+ --decoder32_anchor 142 110 192 243 459 401
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# Build Engine
+echo Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/yolov4_int8.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision int8 \
+ --model ${CURRENT_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+echo Inference
+RUN_BATCH_SIZE=16
+python3 ${RUN_DIR}/inference.py \
+ --test_mode FPS \
+ --model_engine ${ENGINE_FILE} \
+ --warm_up 2 \
+ --bsz ${RUN_BATCH_SIZE} \
+ --imgsz 608 \
+ --loop_count 10 \
+ --eval_dir ${EVAL_DIR} \
+ --coco_gt ${COCO_GT} \
+ --pred_dir ${CHECKPOINTS_DIR} \
+ --precision int8 \
+ --map_target 0.30; check_status
+exit ${EXIT_STATUS}