diff --git a/models/cv/classification/resnext101_32x8d/ixrt/README.md b/models/cv/classification/resnext101_32x8d/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..520a0b7f78ccae5cb0046049a6c5f38d1b1e2ed1 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/README.md @@ -0,0 +1,52 @@ +# ResNext101_32x8d (IXRT) + +## Model Description + +ResNeXt101_32x8d is a deep convolutional neural network introduced in the paper "Aggregated Residual Transformations for Deep Neural Networks." It enhances the traditional ResNet architecture by incorporating group convolutions, offering a new dimension for scaling network capacity through "cardinality" (the number of groups) rather than merely increasing depth or width.The model consists of 101 layers and uses a configuration of 32 groups, each with a width of 8 channels. This design improves feature extraction while maintaining computational efficiency. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.06 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +pip3 install -r requirements.txt +``` + +### Model Conversion + +```bash +python3 export.py --weight resnext101_32x8d-8ba56ff5.pth --output resnext101_32x8d.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_resnext101_32x8d_fp16_accuracy.sh +# Performance +bash scripts/infer_resnext101_32x8d_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ---------------- | --------- | --------- | ------ | -------- | -------- | +| ResNext101_32x8d | 32 | FP16 | 825.78 | 79.277 | 94.498 | diff --git a/models/cv/classification/resnext101_32x8d/ixrt/build_engine.py b/models/cv/classification/resnext101_32x8d/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..109ad08c63c990da45c55103f96a4bedfe3fb0a8 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/build_engine.py @@ -0,0 +1,52 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/calibration_dataset.py b/models/cv/classification/resnext101_32x8d/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..46449ba99d40b2c9c30a4c795c194ce5c11a04f3 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/calibration_dataset.py @@ -0,0 +1,113 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/ci/prepare.sh b/models/cv/classification/resnext101_32x8d/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..5fd42f1639fa059ec7f411f522e7c34474f768a8 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/ci/prepare.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r requirements.txt +python3 export.py --weight resnext101_32x8d-8ba56ff5.pth --output resnext101_32x8d.onnx \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/common.py b/models/cv/classification/resnext101_32x8d/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..5abaf5124e279aecf9acee0d3b02003ac39aa15f --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/common.py @@ -0,0 +1,81 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +from cuda import cuda, cudart + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/resnext101_32x8d/ixrt/config/RESNEXT101_32X8D_CONFIG b/models/cv/classification/resnext101_32x8d/ixrt/config/RESNEXT101_32X8D_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..4dc5c375c7238059b47d2acb799c81af3c318f20 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/config/RESNEXT101_32X8D_CONFIG @@ -0,0 +1,34 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=resnext101_32x8d +ORIGINE_MODEL=resnext101_32x8d.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/resnext101_32x8d/ixrt/export.py b/models/cv/classification/resnext101_32x8d/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..e692c84ca6d3cfa4cb68ee2dd7d67ca52538df51 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.resnext101_32x8d() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/resnext101_32x8d/ixrt/inference.py b/models/cv/classification/resnext101_32x8d/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..ef56158a28e259125b840f44ec58473c6b48d724 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/inference.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +from cuda import cuda, cudart +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + start_time = time.time() + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + (err,) = cudart.cudaMemcpy( + inputs[0]["allocation"], + batch_data, + batch_data.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, + ) + assert err == cudart.cudaError_t.cudaSuccess + # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + (err,) = cudart.cudaMemcpy( + output, + outputs[0]["allocation"], + outputs[0]["nbytes"], + cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, + ) + assert err == cudart.cudaError_t.cudaSuccess + # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + end_time = time.time() + e2e_time = end_time - start_time + print(F"E2E time : {e2e_time:.3f} seconds") + err, = cudart.cudaFree(inputs[0]["allocation"]) + assert err == cudart.cudaError_t.cudaSuccess + err, = cudart.cudaFree(outputs[0]["allocation"]) + assert err == cudart.cudaError_t.cudaSuccess + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/resnext101_32x8d/ixrt/modify_batchsize.py b/models/cv/classification/resnext101_32x8d/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8d086b460b14db81cc8e2d2d1487324534f551 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/resnext101_32x8d/ixrt/requirements.txt b/models/cv/classification/resnext101_32x8d/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..54599ec2e0e86396e2c3c8cdb87ac005b943ca39 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/requirements.txt @@ -0,0 +1,8 @@ +onnx +tqdm +tqdm +onnx +onnxsim +tabulate +ppq +cuda-python \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_accuracy.sh b/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..ce017c77680ca8ae7ae7ef491b565ff17ff53b88 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_accuracy.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=${PROJ_DIR:-"."} +DATASETS_DIR="${DATASETS_DIR}" +CHECKPOINTS_DIR="${PROJ_DIR}" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${PROJ_DIR}/config/RESNEXT101_32X8D_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_performance.sh b/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..48cd1de549b6f125bcb5b1a768b6b2c86c656766 --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_performance.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=${PROJ_DIR:-"."} +DATASETS_DIR="${DATASETS_DIR}" +CHECKPOINTS_DIR="${PROJ_DIR}" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${PROJ_DIR}/config/RESNEXT101_32X8D_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/simplify_model.py b/models/cv/classification/resnext101_32x8d/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..bef33576a17235d24305fa32ae3dab6d7accc10e --- /dev/null +++ b/models/cv/classification/resnext101_32x8d/ixrt/simplify_model.py @@ -0,0 +1,41 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/classification/resnext101_64x4d/ixrt/README.md b/models/cv/classification/resnext101_64x4d/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6551c222049d5d40b13b659b350c9e06bf1c9584 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/README.md @@ -0,0 +1,52 @@ +# ResNext101_64x4d (IGIE) + +## Model Description + +The ResNeXt101_64x4d is a deep learning model based on the deep residual network architecture, which enhances performance and efficiency through the use of grouped convolutions. With a depth of 101 layers and 64 filter groups, it is particularly suited for complex image recognition tasks. While maintaining excellent accuracy, it can adapt to various input sizes + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +pip3 install -r requirements.txt +``` + +### Model Conversion + +```bash +python3 export.py --weight resnext101_64x4d-173b62eb.pth --output resnext101_64x4d.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_resnext101_64x4d_fp16_accuracy.sh +# Performance +bash scripts/infer_resnext101_64x4d_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ---------------- | --------- | --------- | ------ | -------- | -------- | +| ResNext101_64x4d | 32 | FP16 | 663.13 | 82.953 | 96.221 | diff --git a/models/cv/classification/resnext101_64x4d/ixrt/build_engine.py b/models/cv/classification/resnext101_64x4d/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..109ad08c63c990da45c55103f96a4bedfe3fb0a8 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/build_engine.py @@ -0,0 +1,52 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/calibration_dataset.py b/models/cv/classification/resnext101_64x4d/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..46449ba99d40b2c9c30a4c795c194ce5c11a04f3 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/calibration_dataset.py @@ -0,0 +1,113 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/ci/prepare.sh b/models/cv/classification/resnext101_64x4d/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..74cb6057f6382cdd793477792628aaaaba4ad74b --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/ci/prepare.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r requirements.txt +python3 export.py --weight resnext101_64x4d-173b62eb.pth --output resnext101_64x4d.onnx \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/common.py b/models/cv/classification/resnext101_64x4d/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..5abaf5124e279aecf9acee0d3b02003ac39aa15f --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/common.py @@ -0,0 +1,81 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +from cuda import cuda, cudart + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/resnext101_64x4d/ixrt/config/RESNEXT101_64X4D_CONFIG b/models/cv/classification/resnext101_64x4d/ixrt/config/RESNEXT101_64X4D_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..923cc6c0a3bdccf7cac6f0a682dfe6f20fa0d73b --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/config/RESNEXT101_64X4D_CONFIG @@ -0,0 +1,34 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=Resnext101_64x4d +ORIGINE_MODEL=resnext101_64x4d.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/resnext101_64x4d/ixrt/export.py b/models/cv/classification/resnext101_64x4d/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..3f7b1c19082c69c907074f60da2681a2fecfbea8 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.resnext101_64x4d() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/resnext101_64x4d/ixrt/inference.py b/models/cv/classification/resnext101_64x4d/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..ef56158a28e259125b840f44ec58473c6b48d724 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/inference.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +from cuda import cuda, cudart +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + start_time = time.time() + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + (err,) = cudart.cudaMemcpy( + inputs[0]["allocation"], + batch_data, + batch_data.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, + ) + assert err == cudart.cudaError_t.cudaSuccess + # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + (err,) = cudart.cudaMemcpy( + output, + outputs[0]["allocation"], + outputs[0]["nbytes"], + cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, + ) + assert err == cudart.cudaError_t.cudaSuccess + # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + end_time = time.time() + e2e_time = end_time - start_time + print(F"E2E time : {e2e_time:.3f} seconds") + err, = cudart.cudaFree(inputs[0]["allocation"]) + assert err == cudart.cudaError_t.cudaSuccess + err, = cudart.cudaFree(outputs[0]["allocation"]) + assert err == cudart.cudaError_t.cudaSuccess + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/resnext101_64x4d/ixrt/modify_batchsize.py b/models/cv/classification/resnext101_64x4d/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8d086b460b14db81cc8e2d2d1487324534f551 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/resnext101_64x4d/ixrt/requirements.txt b/models/cv/classification/resnext101_64x4d/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..54599ec2e0e86396e2c3c8cdb87ac005b943ca39 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/requirements.txt @@ -0,0 +1,8 @@ +onnx +tqdm +tqdm +onnx +onnxsim +tabulate +ppq +cuda-python \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_accuracy.sh b/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..6be730739f2e237f653ee2b6343e97155b5f16de --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_accuracy.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=${PROJ_DIR:-"."} +DATASETS_DIR="${DATASETS_DIR}" +CHECKPOINTS_DIR="${PROJ_DIR}" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${PROJ_DIR}/config/RESNEXT101_64X4D_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_performance.sh b/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..d5a0958a2ae54dec537e128f6f7cc840efd576c1 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_performance.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=${PROJ_DIR:-"."} +DATASETS_DIR="${DATASETS_DIR}" +CHECKPOINTS_DIR="${PROJ_DIR}" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${PROJ_DIR}/config/RESNEXT101_64X4D_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/simplify_model.py b/models/cv/classification/resnext101_64x4d/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..bef33576a17235d24305fa32ae3dab6d7accc10e --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/ixrt/simplify_model.py @@ -0,0 +1,41 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/README.md b/models/cv/classification/shufflenetv2_x1_0/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e22634a7d97305f95707a87550751708387e7ec1 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/README.md @@ -0,0 +1,63 @@ +# ShuffleNetV2_x1_0 (IXRT) + +## Model Description + +ShuffleNet V2_x1_0 is an efficient convolutional neural network (CNN) architecture that emphasizes a balance between computational efficiency and accuracy, particularly suited for deployment on mobile and embedded devices. The model refines the ShuffleNet series by introducing structural innovations that enhance feature reuse and reduce redundancy, all while maintaining simplicity and performance. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r requirements.txt +``` + +### Model Conversion + +```bash +mkdir checkpoints +python3 export.py --weight shufflenetv2_x1-5666bf0f80.pth --output checkpoints/shufflenetv2_x1_0.onnx +``` + +## Model Inference + +```bash +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +export CONFIG_DIR=config/SHUFFLENETV2_X1_0_CONFIG +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_shufflenetv2_x1_0_fp16_accuracy.sh +# Performance +bash scripts/infer_shufflenetv2_x1_0_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ----------------- | --------- | --------- | -------- | -------- | -------- | +| ShuffleNetV2_x1_0 | 32 | FP16 | 8232.980 | 69.308 | 88.302 | diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/build_engine.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..e0a3bb4bcd143c68406d915245eefb7060010127 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/build_engine.py @@ -0,0 +1,53 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/calibration_dataset.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..774ed840114097ea75eb78f67ffc401f7806ff29 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/calibration_dataset.py @@ -0,0 +1,112 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/ci/prepare.sh b/models/cv/classification/shufflenetv2_x1_0/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..ef18142a949bd11bbf0395def17339ee35e5cc52 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r requirements.txt +mkdir -p checkpoints +python3 export.py --weight shufflenetv2_x1-5666bf0f80.pth --output checkpoints/shufflenetv2_x1_0.onnx \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/common.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..f7aed872184b64e4648bd79f600a59f5b5d0948c --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/common.py @@ -0,0 +1,79 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/config/SHUFFLENETV2_X1_0_CONFIG b/models/cv/classification/shufflenetv2_x1_0/ixrt/config/SHUFFLENETV2_X1_0_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..264dd5f6bc672e86e69d1e14ecc0ae14887af79d --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/config/SHUFFLENETV2_X1_0_CONFIG @@ -0,0 +1,19 @@ +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=shufflenetv2_x1.0 +ORIGINE_MODEL=shufflenetv2_x1_0.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/export.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..6344afd70407618561cefc76efaa5681e7c43727 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.shufflenet_v2_x1_0() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/inference.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..78126cb0ece51d9a8a1f7daea67dc6cdd38ce9bf --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/inference.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + start_time = time.time() + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + end_time = time.time() + end2end_time = end_time - start_time + + print(F"E2E time : {end2end_time:.3f} seconds") + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/modify_batchsize.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8d086b460b14db81cc8e2d2d1487324534f551 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/quant.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..6e9740018253e62b05cc5a0d6c56bbbeae87a181 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/quant.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import random +import argparse +import numpy as np +from random import shuffle +from tensorrt.deploy import static_quantize + +import torch +import torchvision.datasets +from calibration_dataset import getdataloader + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + +args = parse_args() +setseed(args.seed) +calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) +static_quantize(args.model, + calibration_dataloader=calibration_dataloader, + save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/requirements.txt b/models/cv/classification/shufflenetv2_x1_0/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa15a05d3194cbd5267be4ced65f6992cb889427 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/requirements.txt @@ -0,0 +1,6 @@ +onnx +tqdm +onnxsim +tabulate +ppq +pycuda \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/scripts/infer_shufflenetv2_x1_0_fp16_accuracy.sh b/models/cv/classification/shufflenetv2_x1_0/ixrt/scripts/infer_shufflenetv2_x1_0_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..c0d5f96ca5120fed452077d586c9975967e57434 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/scripts/infer_shufflenetv2_x1_0_fp16_accuracy.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/scripts/infer_shufflenetv2_x1_0_fp16_performance.sh b/models/cv/classification/shufflenetv2_x1_0/ixrt/scripts/infer_shufflenetv2_x1_0_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..9ea9b62e3a59437e74df80aaea55cc6d03037ec7 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/scripts/infer_shufflenetv2_x1_0_fp16_performance.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/simplify_model.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..bef33576a17235d24305fa32ae3dab6d7accc10e --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/simplify_model.py @@ -0,0 +1,41 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/README.md b/models/cv/classification/shufflenetv2_x1_5/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8b92dab9b4702287579fe2a5ed4ca11e9debf0bb --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/README.md @@ -0,0 +1,63 @@ +# ShuffleNetV2_x1_5 (IXRT) + +## Model Description + +ShuffleNetV2_x1_5 is a lightweight convolutional neural network specifically designed for efficient image recognition tasks on resource-constrained devices. It achieves high performance and low latency through the introduction of channel shuffling and pointwise group convolutions. Despite its small model size, it offers high accuracy and is suitable for a variety of vision tasks in mobile devices and embedded systems. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.06 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r requirements.txt +``` + +### Model Conversion + +```bash +mkdir checkpoints +python3 export.py --weight shufflenetv2_x1_5-3c479a10.pth --output checkpoints/shufflenetv2_x1_5.onnx +``` + +## Model Inference + +```bash +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +export CONFIG_DIR=config/SHUFFLENETV2_X1_5_CONFIG +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh +# Performance +bash scripts/infer_shufflenetv2_x1_5_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ----------------- | --------- | --------- | -------- | -------- | -------- | +| ShuffleNetV2_x1_5 | 32 | FP16 | 5395.026 | 72.778 | 91.052 | diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/build_engine.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..e0a3bb4bcd143c68406d915245eefb7060010127 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/build_engine.py @@ -0,0 +1,53 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/calibration_dataset.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..774ed840114097ea75eb78f67ffc401f7806ff29 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/calibration_dataset.py @@ -0,0 +1,112 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/ci/prepare.sh b/models/cv/classification/shufflenetv2_x1_5/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..b526c25399f7cfb8437d4575d65f1d090609a90c --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r requirements.txt +mkdir -p checkpoints +python3 export.py --weight shufflenetv2_x1_5-3c479a10.pth --output checkpoints/shufflenetv2_x1_5.onnx \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/common.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..f7aed872184b64e4648bd79f600a59f5b5d0948c --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/common.py @@ -0,0 +1,79 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/config/SHUFFLENETV2_X1_5_CONFIG b/models/cv/classification/shufflenetv2_x1_5/ixrt/config/SHUFFLENETV2_X1_5_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..3de323de0db1322d05177ed46ff30aa515deda8c --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/config/SHUFFLENETV2_X1_5_CONFIG @@ -0,0 +1,19 @@ +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=shufflenetv2_x1.5 +ORIGINE_MODEL=shufflenetv2_x1_5.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/export.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..f68b01c4ff54f3d68b9359f10ab60042e2c2fb8f --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.shufflenet_v2_x1_5() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/inference.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..78126cb0ece51d9a8a1f7daea67dc6cdd38ce9bf --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/inference.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + start_time = time.time() + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + end_time = time.time() + end2end_time = end_time - start_time + + print(F"E2E time : {end2end_time:.3f} seconds") + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/modify_batchsize.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8d086b460b14db81cc8e2d2d1487324534f551 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/quant.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..6e9740018253e62b05cc5a0d6c56bbbeae87a181 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/quant.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import random +import argparse +import numpy as np +from random import shuffle +from tensorrt.deploy import static_quantize + +import torch +import torchvision.datasets +from calibration_dataset import getdataloader + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + +args = parse_args() +setseed(args.seed) +calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) +static_quantize(args.model, + calibration_dataloader=calibration_dataloader, + save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/requirements.txt b/models/cv/classification/shufflenetv2_x1_5/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa15a05d3194cbd5267be4ced65f6992cb889427 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/requirements.txt @@ -0,0 +1,6 @@ +onnx +tqdm +onnxsim +tabulate +ppq +pycuda \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh b/models/cv/classification/shufflenetv2_x1_5/ixrt/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..c0d5f96ca5120fed452077d586c9975967e57434 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh b/models/cv/classification/shufflenetv2_x1_5/ixrt/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..9ea9b62e3a59437e74df80aaea55cc6d03037ec7 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/simplify_model.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..bef33576a17235d24305fa32ae3dab6d7accc10e --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/simplify_model.py @@ -0,0 +1,41 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/README.md b/models/cv/classification/shufflenetv2_x2_0/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..186044d07a5fa9d9c90211cdfd15459ea5149d97 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/README.md @@ -0,0 +1,57 @@ +# ShuffleNetV2_x2_0 (IXRT) + +## Model Description + +ShuffleNetV2_x2_0 is a lightweight convolutional neural network introduced in the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" by Megvii (Face++). It is designed to achieve high performance with low computational cost, making it ideal for mobile and embedded devices.The x2_0 in its name indicates a width multiplier of 2.0, meaning the model has twice as many channels compared to the baseline ShuffleNetV2_x1_0. It employs Channel Shuffle to enable efficient information exchange between grouped convolutions, addressing the limitations of group convolutions. The core building block, the ShuffleNetV2 block, features a split-merge design and channel shuffle mechanism, ensuring both high efficiency and accuracy. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.06 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +pip3 install -r requirements.txt +``` + +### Model Conversion + +```bash +mkdir checkpoints +python3 export.py --weight shufflenetv2_x2_0-8be3c8ee.pth --output checkpoints/shufflenetv2_x2_0.onnx +``` + +## Model Inference + +```bash +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +export CONFIG_DIR=config/SHUFFLENETV2_X2_0_CONFIG +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_shufflenetv2_x2_0_fp16_accuracy.sh +# Performance +bash scripts/infer_shufflenetv2_x2_0_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ----------------- | --------- | --------- | -------- | -------- | -------- | +| ShuffleNetV2_x2_0 | 32 | FP16 | 5439.098 | 76.176 | 92.860 | diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/build_engine.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..e0a3bb4bcd143c68406d915245eefb7060010127 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/build_engine.py @@ -0,0 +1,53 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/calibration_dataset.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..774ed840114097ea75eb78f67ffc401f7806ff29 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/calibration_dataset.py @@ -0,0 +1,112 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/ci/prepare.sh b/models/cv/classification/shufflenetv2_x2_0/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..4d4136356475f7162b871f84a5a1eabb8aee2c93 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r requirements.txt +mkdir -p checkpoints +python3 export.py --weight shufflenetv2_x2_0-8be3c8ee.pth --output checkpoints/shufflenetv2_x2_0.onnx \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/common.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..f7aed872184b64e4648bd79f600a59f5b5d0948c --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/common.py @@ -0,0 +1,79 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/config/SHUFFLENETV2_X2_0_CONFIG b/models/cv/classification/shufflenetv2_x2_0/ixrt/config/SHUFFLENETV2_X2_0_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..8d18e5bb46ebf8e5559326efb739038f2923fc0d --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/config/SHUFFLENETV2_X2_0_CONFIG @@ -0,0 +1,19 @@ +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=shufflenetv2_x2.0 +ORIGINE_MODEL=shufflenetv2_x2_0.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/export.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..3a3c15a885391682a1f8e28bab206e8959e4bfc5 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.shufflenet_v2_x2_0() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/inference.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..78126cb0ece51d9a8a1f7daea67dc6cdd38ce9bf --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/inference.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + start_time = time.time() + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + end_time = time.time() + end2end_time = end_time - start_time + + print(F"E2E time : {end2end_time:.3f} seconds") + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/modify_batchsize.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8d086b460b14db81cc8e2d2d1487324534f551 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/quant.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..6e9740018253e62b05cc5a0d6c56bbbeae87a181 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/quant.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import random +import argparse +import numpy as np +from random import shuffle +from tensorrt.deploy import static_quantize + +import torch +import torchvision.datasets +from calibration_dataset import getdataloader + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + +args = parse_args() +setseed(args.seed) +calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) +static_quantize(args.model, + calibration_dataloader=calibration_dataloader, + save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/requirements.txt b/models/cv/classification/shufflenetv2_x2_0/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa15a05d3194cbd5267be4ced65f6992cb889427 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/requirements.txt @@ -0,0 +1,6 @@ +onnx +tqdm +onnxsim +tabulate +ppq +pycuda \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/scripts/infer_shufflenetv2_x2_0_fp16_accuracy.sh b/models/cv/classification/shufflenetv2_x2_0/ixrt/scripts/infer_shufflenetv2_x2_0_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..c0d5f96ca5120fed452077d586c9975967e57434 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/scripts/infer_shufflenetv2_x2_0_fp16_accuracy.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/scripts/infer_shufflenetv2_x2_0_fp16_performance.sh b/models/cv/classification/shufflenetv2_x2_0/ixrt/scripts/infer_shufflenetv2_x2_0_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..9ea9b62e3a59437e74df80aaea55cc6d03037ec7 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/scripts/infer_shufflenetv2_x2_0_fp16_performance.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/simplify_model.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..bef33576a17235d24305fa32ae3dab6d7accc10e --- /dev/null +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/simplify_model.py @@ -0,0 +1,41 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/object_detection/yolov10/ixrt/README.md b/models/cv/object_detection/yolov10/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..493b11a0d89307ae2c6f7db4aa2a76c26c1d25f5 --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/README.md @@ -0,0 +1,65 @@ +# YOLOv10 (IXRT) + +## Model Description + +YOLOv10, built on the Ultralytics Python package by researchers at Tsinghua University, introduces a new approach to real-time object detection, addressing both the post-processing and model architecture deficiencies found in previous YOLO versions. By eliminating non-maximum suppression (NMS) and optimizing various model components, YOLOv10 achieves state-of-the-art performance with significantly reduced computational overhead. Extensive experiments demonstrate its superior accuracy-latency trade-offs across multiple model scales. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.06 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +### Install Dependencies + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r requirements.txt +``` + +## Model Conversion + +```bash +mkdir checkpoints +mv yolov10s.pt yolov10.pt +python3 export.py --weight yolov10.pt --batch 32 +``` + +## Model Inference + +```bash +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/coco/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov10_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov10_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | +| ------- | --------- | --------- | ------ | ------- | ------------ | +| YOLOv10 | 32 | FP16 | 810.97 | 0.629 | 0.461 | + +## References + +- [YOLOv10](https://docs.ultralytics.com/models/yolov10) diff --git a/models/cv/object_detection/yolov10/ixrt/build_engine.py b/models/cv/object_detection/yolov10/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..e0839fc3229ef4ab6d23c7ffeae36a1962bcaaf1 --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/build_engine.py @@ -0,0 +1,94 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from tensorrt import Dims + + +def build_engine_trtapi_staticshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build static shape engine done!") + + +def build_engine_trtapi_dynamicshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + + profile = builder.create_optimization_profile() + profile.set_shape("input", + Dims([1, 3, 608, 608]), + Dims([32, 3, 608, 608]), + Dims([64, 3, 608, 608]), + ) + build_config.add_optimization_profile(profile) + + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + # set dynamic + num_inputs = network.num_inputs + for i in range(num_inputs): + input_tensor = network.get_input(i) + input_tensor.shape = Dims([-1, 3, 608, 608]) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build dynamic shape engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + + +if __name__ == "__main__": + args = parse_args() + build_engine_trtapi_staticshape(args) + # build_engine_trtapi_dynamicshape(args) diff --git a/models/cv/object_detection/yolov10/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov10/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..6e553e2a18bf9b56fb361d7de1d6a01ed32aeb04 --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/ci/prepare.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install -r requirements.txt + +mkdir checkpoints +mv yolov10s.pt yolov10.pt +python3 export.py --weight yolov10.pt --batch 32 +mv yolov10.onnx checkpoints/ diff --git a/models/cv/object_detection/yolov10/ixrt/common.py b/models/cv/object_detection/yolov10/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..74022b562af71ef8a234b5075252ea6a32505558 --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/common.py @@ -0,0 +1,337 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import time +import numpy as np +from tqdm import tqdm + +import tensorrt +from cuda import cuda, cudart + + +def load_class_names(namesfile): + class_names = [] + with open(namesfile, 'r') as fp: + lines = fp.readlines() + for line in lines: + line = line.rstrip() + class_names.append(line) + return class_names + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + + for x1, y1, x2, y2, _, p, c in boxes: + x1, y1, x2, y2, p = float(x1), float(y1), float(x2), float(y2), float(p) + c = int(c) + x = x1 + y = y1 + w = x2 - x1 + h = y2 - y1 + + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +################## About TensorRT ################# +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def setup_io_bindings(engine, context): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = context.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size + } + # print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations +########################################################## + + +################## About Loading Dataset ################# +def load_images(images_path): + """ + If image path is given, return it directly + For txt file, read it and return each line as image path + In other case, it's a folder, return a list with names of each + jpg, jpeg and png file + """ + input_path_extension = images_path.split('.')[-1] + if input_path_extension in ['jpg', 'jpeg', 'png']: + return [images_path] + elif input_path_extension == "txt": + with open(images_path, "r") as f: + return f.read().splitlines() + else: + return glob.glob( + os.path.join(images_path, "*.jpg")) + \ + glob.glob(os.path.join(images_path, "*.png")) + \ + glob.glob(os.path.join(images_path, "*.jpeg")) + +def prepare_batch(images_path, bs=16, input_size=(608, 608)): + + width, height = input_size + + batch_names = [] + batch_images = [] + batch_shapes = [] + + temp_names = [] + temp_images = [] + temp_shapes = [] + + for i, image_path in tqdm(enumerate(images_path), desc="Loading coco data"): + name = os.path.basename(image_path) + image = cv2.imread(image_path) + h, w, _ = image.shape + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image_resized = cv2.resize(image_rgb, (width, height), + interpolation=cv2.INTER_LINEAR) + custom_image = image_resized.transpose(2, 0, 1).astype(np.float32) / 255. + custom_image = np.expand_dims(custom_image, axis=0) + + if i != 0 and i % bs == 0: + batch_names.append(temp_names) + batch_images.append(np.concatenate(temp_images, axis=0)) + batch_shapes.append(temp_shapes) + + temp_names = [name] + temp_images = [custom_image] + temp_shapes = [(h, w)] + else: + temp_names.append(name) + temp_images.append(custom_image) + temp_shapes.append((h, w)) + + return batch_names, batch_images, batch_shapes +########################################################## + + +################## About Operating box ################# +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 +########################################################## + + +################## About pre and post processing ######### +def pre_processing(src_img, imgsz=608): + resized = cv2.resize(src_img, (imgsz, imgsz), interpolation=cv2.INTER_LINEAR) + in_img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) + in_img = np.transpose(in_img, (2, 0, 1)).astype(np.float32) + in_img = np.expand_dims(in_img, axis=0) + in_img /= 255.0 + return in_img + +def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False): + # print(boxes.shape) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1) * (y2 - y1) + order = confs.argsort()[::-1] + + keep = [] + while order.size > 0: + idx_self = order[0] + idx_other = order[1:] + + keep.append(idx_self) + + xx1 = np.maximum(x1[idx_self], x1[idx_other]) + yy1 = np.maximum(y1[idx_self], y1[idx_other]) + xx2 = np.minimum(x2[idx_self], x2[idx_other]) + yy2 = np.minimum(y2[idx_self], y2[idx_other]) + + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + inter = w * h + + if min_mode: + over = inter / np.minimum(areas[order[0]], areas[order[1:]]) + else: + over = inter / (areas[order[0]] + areas[order[1:]] - inter) + + inds = np.where(over <= nms_thresh)[0] + order = order[inds + 1] + + return np.array(keep) + + +def post_processing(img, conf_thresh, nms_thresh, output, num_classes=80): + + # [batch, num, 1, 4] + box_array = output[:, :, :4] + # [batch, num, 2] + class_confs = output[:, :, 4:] + + max_conf = class_confs[:, :, 1] + max_id = class_confs[:, :, 0] + + bboxes_batch = [] + for i in range(box_array.shape[0]): + + argwhere = max_conf[i] > conf_thresh + l_box_array = box_array[i, argwhere, :] + l_max_conf = max_conf[i, argwhere] + l_max_id = max_id[i, argwhere] + + bboxes = [] + # nms for each class + for j in range(num_classes): + + cls_argwhere = l_max_id == j + ll_box_array = l_box_array[cls_argwhere, :] + ll_max_conf = l_max_conf[cls_argwhere] + ll_max_id = l_max_id[cls_argwhere] + + keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh) + + if (keep.size > 0): + ll_box_array = ll_box_array[keep, :] + ll_max_conf = ll_max_conf[keep] + ll_max_id = ll_max_id[keep] + + for k in range(ll_box_array.shape[0]): + bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], + ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]]) + + bboxes_batch.append(bboxes) + + return bboxes_batch +########################################################## + diff --git a/models/cv/object_detection/yolov10/ixrt/export.py b/models/cv/object_detection/yolov10/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..388e3a8540b58262f9d8c8e545848addf3afb606 --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/export.py @@ -0,0 +1,43 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ultralytics import YOLO + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--batch", + type=int, + required=True, + help="batchsize of the model.") + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = YOLO(args.weight).cpu() + + model.export(format='onnx', batch=args.batch, imgsz=(640, 640), opset=11) + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/yolov10/ixrt/inference.py b/models/cv/object_detection/yolov10/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..1ac908a7c8f8092f3fb59018cf295ecfb954fa0c --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/inference.py @@ -0,0 +1,250 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import json +import argparse +import time +import tensorrt +from tensorrt import Dims +from cuda import cuda, cudart +import torch +import numpy as np +from tqdm import tqdm + +from common import create_engine_context, setup_io_bindings + +from pathlib import Path + +from ultralytics.cfg import get_cfg +from ultralytics.data import converter +from ultralytics.utils import DEFAULT_CFG +from ultralytics.data.utils import check_det_dataset +from ultralytics.utils.metrics import ConfusionMatrix +from ultralytics.models.yolo.detect import DetectionValidator + +coco_classes = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', + 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', + 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', + 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', + 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', + 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', + 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'} + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_engine", + type=str, + required=True, + help="ixrt engine path.") + + parser.add_argument("--bsz", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--warm_up", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=0.0, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=0.0, + help="Model inference FPS target.") + + parser.add_argument("--conf", + type=float, + default=0.001, + help="confidence threshold.") + + parser.add_argument("--iou", + type=float, + default=0.65, + help="iou threshold.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +class IxRT_Validator(DetectionValidator): + def __call__(self, config, data): + self.data = data + self.stride = 32 + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + self.init_metrics() + + total_num = 0 + + input_name = "input" + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + engine, context = create_engine_context(config.model_engine, logger) + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims((config.bsz,3,config.imgsz,config.imgsz))) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + forward_time = 0.0 + num_samples = 0 + + e2e_start_time = time.time() + for batch in tqdm(self.dataloader): + batch = self.preprocess(batch) + + imgs = batch['img'] + pad_batch = len(imgs) != self.args.batch + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:])) + + batch_data = np.ascontiguousarray(imgs) + data_shape = batch_data.shape + + cur_bsz_sample = batch_data.shape[0] + num_samples += cur_bsz_sample + + # Set input + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims(data_shape)) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + + + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + for alloc in allocations: + if not alloc: + continue + (err,) = cudart.cudaFree(alloc) + assert err == cudart.cudaError_t.cudaSuccess + + if pad_batch: + output = output[:origin_size] + + outputs = torch.from_numpy(output) + + preds = self.postprocess([outputs]) + + self.update_metrics(preds, batch) + + e2e_end_time = time.time() + if config.perf_only: + fps = num_samples / forward_time + return fps + else: + stats = self.get_stats() + + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + print(f'Saving {f.name} ...') + json.dump(self.jdict, f) # flatten and save + + stats = self.eval_json(stats) + + end2end_time = e2e_end_time - e2e_start_time + print(F"E2E time : {end2end_time:.3f} seconds") + + return stats + + def init_metrics(self): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = self.data['names'] + self.nc = len(self.names) + self.metrics.names = self.names + self.confusion_matrix = ConfusionMatrix(nc=80) + self.seen = 0 + self.jdict = [] + self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[]) + +def main(): + config = parse_args() + + batch_size = config.bsz + + overrides = {'mode': 'val'} + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = batch_size + cfg_args.save_json = True + + data = { + 'path': Path(config.datasets), + 'val': os.path.join(config.datasets, 'val2017.txt'), + 'names': coco_classes + } + + validator = IxRT_Validator(args=cfg_args, save_dir=Path('.')) + + if config.perf_only: + fps = validator(config, data) + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + else: + stats = validator(config, data) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/yolov10/ixrt/quant.py b/models/cv/object_detection/yolov10/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..48f9c914f4db85525bb7f52a1a5eec0cd9e3a8d0 --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/quant.py @@ -0,0 +1,105 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from common import letterbox + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov4_bs16_without_decoder.onnx") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_quant_model", type=str, help="save the quantization model path", default=None) + parser.add_argument("--bsz", type=int, default=16) + parser.add_argument("--step", type=int, default=32) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=608) + parser.add_argument("--use_letterbox", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + + +def get_dataloader(data_dir, step=32, batch_size=16, new_shape=[608, 608], use_letterbox=False): + num = step * batch_size + val_list = [os.path.join(data_dir, x) for x in os.listdir(data_dir)] + random.shuffle(val_list) + pic_list = val_list[:num] + + calibration_dataset = [] + for file_path in pic_list: + pic_data = cv2.imread(file_path) + org_img = pic_data + assert org_img is not None, 'Image not Found ' + file_path + h0, w0 = org_img.shape[:2] + + if use_letterbox: + img, ratio, dwdh = letterbox(org_img, new_shape=(new_shape[1], new_shape[0]), auto=False, scaleup=True) + else: + img = cv2.resize(org_img, new_shape) + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) / 255.0 # 0~1 np array + img = torch.from_numpy(img).float() + + calibration_dataset.append(img) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=True, + batch_size=batch_size, + drop_last=True + ) + return calibration_dataloader + +dataloader = get_dataloader( + data_dir=args.dataset_dir, + step=args.step, + batch_size=args.bsz, + new_shape=(args.imgsz, args.imgsz), + use_letterbox=args.use_letterbox +) + +dirname = os.path.dirname(args.save_quant_model) +quant_json_path = os.path.join(dirname, f"quantized_{model_name}.json") + +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=args.save_quant_model, + save_quant_params_path=quant_json_path, + observer=args.observer, + data_preprocess=lambda x: x.to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) diff --git a/models/cv/object_detection/yolov10/ixrt/requirements.txt b/models/cv/object_detection/yolov10/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..346fe1c7d21fa2d840c877dec15c1e30bd58a1d3 --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/requirements.txt @@ -0,0 +1,7 @@ +tqdm +onnx +onnxsim +pycocotools +opencv-python==4.8.0.74 +ultralytics==8.2.51 +cuda-python \ No newline at end of file diff --git a/models/cv/object_detection/yolov10/ixrt/scripts/infer_yolov10_fp16_accuracy.sh b/models/cv/object_detection/yolov10/ixrt/scripts/infer_yolov10_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..eed10cc3a6a81d0c52ac15c650dcf0a3a0c7ffc3 --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/scripts/infer_yolov10_fp16_accuracy.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=${PROJ_DIR} +DATASETS_DIR=${DATASETS_DIR} +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR=${CHECKPOINTS_DIR} +RUN_DIR=${PROJ_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov10 +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov10.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov10_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --acc_target 0.3 +exit ${EXIT_STATUS} diff --git a/models/cv/object_detection/yolov10/ixrt/scripts/infer_yolov10_fp16_performance.sh b/models/cv/object_detection/yolov10/ixrt/scripts/infer_yolov10_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..0a294edda4872f7823047f3d07cf9fa6a1063f3f --- /dev/null +++ b/models/cv/object_detection/yolov10/ixrt/scripts/infer_yolov10_fp16_performance.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=${PROJ_DIR} +DATASETS_DIR=${DATASETS_DIR} +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR=${CHECKPOINTS_DIR} +RUN_DIR=${PROJ_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov10 +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov10.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov10_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --perf_only true \ + --fps_target 0.0 +exit ${EXIT_STATUS} diff --git a/models/cv/object_detection/yolov11/ixrt/README.md b/models/cv/object_detection/yolov11/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74fc84fab0421dc5e52dd86e7ed8cd3497eeaf60 --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/README.md @@ -0,0 +1,65 @@ +# YOLOv11 (IGIE) + +## Model Description + +YOLOv11 is the latest generation of the YOLO (You Only Look Once) series object detection model released by Ultralytics. Building upon the advancements of previous YOLO models, such as YOLOv5 and YOLOv8, YOLOv11 introduces comprehensive upgrades to further enhance performance, flexibility, and usability. It is a versatile deep learning model designed for multi-task applications, supporting object detection, instance segmentation, image classification, keypoint pose estimation, and rotated object detection. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.06 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +### Install Dependencies + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r requirements.txt +``` + +## Model Conversion + +```bash +mkdir checkpoints +mv yolo11n.pt yolo11.pt +python3 export.py --weight yolo11.pt --batch 32 +``` + +## Model Inference + +```bash +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/coco/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov11_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov11_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | +| ------- | --------- | --------- | ------- | ------- | ------------ | +| YOLOv11 | 32 | FP16 | 1519.25 | 0.551 | 0.393 | + +## References + +YOLOv11: diff --git a/models/cv/object_detection/yolov11/ixrt/build_engine.py b/models/cv/object_detection/yolov11/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..e0839fc3229ef4ab6d23c7ffeae36a1962bcaaf1 --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/build_engine.py @@ -0,0 +1,94 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from tensorrt import Dims + + +def build_engine_trtapi_staticshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build static shape engine done!") + + +def build_engine_trtapi_dynamicshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + + profile = builder.create_optimization_profile() + profile.set_shape("input", + Dims([1, 3, 608, 608]), + Dims([32, 3, 608, 608]), + Dims([64, 3, 608, 608]), + ) + build_config.add_optimization_profile(profile) + + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + # set dynamic + num_inputs = network.num_inputs + for i in range(num_inputs): + input_tensor = network.get_input(i) + input_tensor.shape = Dims([-1, 3, 608, 608]) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build dynamic shape engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + + +if __name__ == "__main__": + args = parse_args() + build_engine_trtapi_staticshape(args) + # build_engine_trtapi_dynamicshape(args) diff --git a/models/cv/object_detection/yolov11/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov11/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..a011f236c090ea4f990f2bd658f07aa5de43463e --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/ci/prepare.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install -r requirements.txt + +mkdir checkpoints +mv yolo11n.pt yolo11.pt +python3 export.py --weight yolo11.pt --batch 32 +mv yolo11.onnx checkpoints/ diff --git a/models/cv/object_detection/yolov11/ixrt/common.py b/models/cv/object_detection/yolov11/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..74022b562af71ef8a234b5075252ea6a32505558 --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/common.py @@ -0,0 +1,337 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import time +import numpy as np +from tqdm import tqdm + +import tensorrt +from cuda import cuda, cudart + + +def load_class_names(namesfile): + class_names = [] + with open(namesfile, 'r') as fp: + lines = fp.readlines() + for line in lines: + line = line.rstrip() + class_names.append(line) + return class_names + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + + for x1, y1, x2, y2, _, p, c in boxes: + x1, y1, x2, y2, p = float(x1), float(y1), float(x2), float(y2), float(p) + c = int(c) + x = x1 + y = y1 + w = x2 - x1 + h = y2 - y1 + + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +################## About TensorRT ################# +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def setup_io_bindings(engine, context): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = context.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size + } + # print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations +########################################################## + + +################## About Loading Dataset ################# +def load_images(images_path): + """ + If image path is given, return it directly + For txt file, read it and return each line as image path + In other case, it's a folder, return a list with names of each + jpg, jpeg and png file + """ + input_path_extension = images_path.split('.')[-1] + if input_path_extension in ['jpg', 'jpeg', 'png']: + return [images_path] + elif input_path_extension == "txt": + with open(images_path, "r") as f: + return f.read().splitlines() + else: + return glob.glob( + os.path.join(images_path, "*.jpg")) + \ + glob.glob(os.path.join(images_path, "*.png")) + \ + glob.glob(os.path.join(images_path, "*.jpeg")) + +def prepare_batch(images_path, bs=16, input_size=(608, 608)): + + width, height = input_size + + batch_names = [] + batch_images = [] + batch_shapes = [] + + temp_names = [] + temp_images = [] + temp_shapes = [] + + for i, image_path in tqdm(enumerate(images_path), desc="Loading coco data"): + name = os.path.basename(image_path) + image = cv2.imread(image_path) + h, w, _ = image.shape + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image_resized = cv2.resize(image_rgb, (width, height), + interpolation=cv2.INTER_LINEAR) + custom_image = image_resized.transpose(2, 0, 1).astype(np.float32) / 255. + custom_image = np.expand_dims(custom_image, axis=0) + + if i != 0 and i % bs == 0: + batch_names.append(temp_names) + batch_images.append(np.concatenate(temp_images, axis=0)) + batch_shapes.append(temp_shapes) + + temp_names = [name] + temp_images = [custom_image] + temp_shapes = [(h, w)] + else: + temp_names.append(name) + temp_images.append(custom_image) + temp_shapes.append((h, w)) + + return batch_names, batch_images, batch_shapes +########################################################## + + +################## About Operating box ################# +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 +########################################################## + + +################## About pre and post processing ######### +def pre_processing(src_img, imgsz=608): + resized = cv2.resize(src_img, (imgsz, imgsz), interpolation=cv2.INTER_LINEAR) + in_img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) + in_img = np.transpose(in_img, (2, 0, 1)).astype(np.float32) + in_img = np.expand_dims(in_img, axis=0) + in_img /= 255.0 + return in_img + +def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False): + # print(boxes.shape) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1) * (y2 - y1) + order = confs.argsort()[::-1] + + keep = [] + while order.size > 0: + idx_self = order[0] + idx_other = order[1:] + + keep.append(idx_self) + + xx1 = np.maximum(x1[idx_self], x1[idx_other]) + yy1 = np.maximum(y1[idx_self], y1[idx_other]) + xx2 = np.minimum(x2[idx_self], x2[idx_other]) + yy2 = np.minimum(y2[idx_self], y2[idx_other]) + + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + inter = w * h + + if min_mode: + over = inter / np.minimum(areas[order[0]], areas[order[1:]]) + else: + over = inter / (areas[order[0]] + areas[order[1:]] - inter) + + inds = np.where(over <= nms_thresh)[0] + order = order[inds + 1] + + return np.array(keep) + + +def post_processing(img, conf_thresh, nms_thresh, output, num_classes=80): + + # [batch, num, 1, 4] + box_array = output[:, :, :4] + # [batch, num, 2] + class_confs = output[:, :, 4:] + + max_conf = class_confs[:, :, 1] + max_id = class_confs[:, :, 0] + + bboxes_batch = [] + for i in range(box_array.shape[0]): + + argwhere = max_conf[i] > conf_thresh + l_box_array = box_array[i, argwhere, :] + l_max_conf = max_conf[i, argwhere] + l_max_id = max_id[i, argwhere] + + bboxes = [] + # nms for each class + for j in range(num_classes): + + cls_argwhere = l_max_id == j + ll_box_array = l_box_array[cls_argwhere, :] + ll_max_conf = l_max_conf[cls_argwhere] + ll_max_id = l_max_id[cls_argwhere] + + keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh) + + if (keep.size > 0): + ll_box_array = ll_box_array[keep, :] + ll_max_conf = ll_max_conf[keep] + ll_max_id = ll_max_id[keep] + + for k in range(ll_box_array.shape[0]): + bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], + ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]]) + + bboxes_batch.append(bboxes) + + return bboxes_batch +########################################################## + diff --git a/models/cv/object_detection/yolov11/ixrt/export.py b/models/cv/object_detection/yolov11/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..388e3a8540b58262f9d8c8e545848addf3afb606 --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/export.py @@ -0,0 +1,43 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ultralytics import YOLO + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--batch", + type=int, + required=True, + help="batchsize of the model.") + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = YOLO(args.weight).cpu() + + model.export(format='onnx', batch=args.batch, imgsz=(640, 640), opset=11) + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/yolov11/ixrt/inference.py b/models/cv/object_detection/yolov11/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..1ac908a7c8f8092f3fb59018cf295ecfb954fa0c --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/inference.py @@ -0,0 +1,250 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import json +import argparse +import time +import tensorrt +from tensorrt import Dims +from cuda import cuda, cudart +import torch +import numpy as np +from tqdm import tqdm + +from common import create_engine_context, setup_io_bindings + +from pathlib import Path + +from ultralytics.cfg import get_cfg +from ultralytics.data import converter +from ultralytics.utils import DEFAULT_CFG +from ultralytics.data.utils import check_det_dataset +from ultralytics.utils.metrics import ConfusionMatrix +from ultralytics.models.yolo.detect import DetectionValidator + +coco_classes = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', + 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', + 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', + 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', + 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', + 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', + 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'} + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_engine", + type=str, + required=True, + help="ixrt engine path.") + + parser.add_argument("--bsz", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--warm_up", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=0.0, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=0.0, + help="Model inference FPS target.") + + parser.add_argument("--conf", + type=float, + default=0.001, + help="confidence threshold.") + + parser.add_argument("--iou", + type=float, + default=0.65, + help="iou threshold.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +class IxRT_Validator(DetectionValidator): + def __call__(self, config, data): + self.data = data + self.stride = 32 + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + self.init_metrics() + + total_num = 0 + + input_name = "input" + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + engine, context = create_engine_context(config.model_engine, logger) + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims((config.bsz,3,config.imgsz,config.imgsz))) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + forward_time = 0.0 + num_samples = 0 + + e2e_start_time = time.time() + for batch in tqdm(self.dataloader): + batch = self.preprocess(batch) + + imgs = batch['img'] + pad_batch = len(imgs) != self.args.batch + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:])) + + batch_data = np.ascontiguousarray(imgs) + data_shape = batch_data.shape + + cur_bsz_sample = batch_data.shape[0] + num_samples += cur_bsz_sample + + # Set input + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims(data_shape)) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + + + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + for alloc in allocations: + if not alloc: + continue + (err,) = cudart.cudaFree(alloc) + assert err == cudart.cudaError_t.cudaSuccess + + if pad_batch: + output = output[:origin_size] + + outputs = torch.from_numpy(output) + + preds = self.postprocess([outputs]) + + self.update_metrics(preds, batch) + + e2e_end_time = time.time() + if config.perf_only: + fps = num_samples / forward_time + return fps + else: + stats = self.get_stats() + + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + print(f'Saving {f.name} ...') + json.dump(self.jdict, f) # flatten and save + + stats = self.eval_json(stats) + + end2end_time = e2e_end_time - e2e_start_time + print(F"E2E time : {end2end_time:.3f} seconds") + + return stats + + def init_metrics(self): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = self.data['names'] + self.nc = len(self.names) + self.metrics.names = self.names + self.confusion_matrix = ConfusionMatrix(nc=80) + self.seen = 0 + self.jdict = [] + self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[]) + +def main(): + config = parse_args() + + batch_size = config.bsz + + overrides = {'mode': 'val'} + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = batch_size + cfg_args.save_json = True + + data = { + 'path': Path(config.datasets), + 'val': os.path.join(config.datasets, 'val2017.txt'), + 'names': coco_classes + } + + validator = IxRT_Validator(args=cfg_args, save_dir=Path('.')) + + if config.perf_only: + fps = validator(config, data) + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + else: + stats = validator(config, data) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/yolov11/ixrt/quant.py b/models/cv/object_detection/yolov11/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..48f9c914f4db85525bb7f52a1a5eec0cd9e3a8d0 --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/quant.py @@ -0,0 +1,105 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from common import letterbox + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov4_bs16_without_decoder.onnx") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_quant_model", type=str, help="save the quantization model path", default=None) + parser.add_argument("--bsz", type=int, default=16) + parser.add_argument("--step", type=int, default=32) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=608) + parser.add_argument("--use_letterbox", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + + +def get_dataloader(data_dir, step=32, batch_size=16, new_shape=[608, 608], use_letterbox=False): + num = step * batch_size + val_list = [os.path.join(data_dir, x) for x in os.listdir(data_dir)] + random.shuffle(val_list) + pic_list = val_list[:num] + + calibration_dataset = [] + for file_path in pic_list: + pic_data = cv2.imread(file_path) + org_img = pic_data + assert org_img is not None, 'Image not Found ' + file_path + h0, w0 = org_img.shape[:2] + + if use_letterbox: + img, ratio, dwdh = letterbox(org_img, new_shape=(new_shape[1], new_shape[0]), auto=False, scaleup=True) + else: + img = cv2.resize(org_img, new_shape) + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) / 255.0 # 0~1 np array + img = torch.from_numpy(img).float() + + calibration_dataset.append(img) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=True, + batch_size=batch_size, + drop_last=True + ) + return calibration_dataloader + +dataloader = get_dataloader( + data_dir=args.dataset_dir, + step=args.step, + batch_size=args.bsz, + new_shape=(args.imgsz, args.imgsz), + use_letterbox=args.use_letterbox +) + +dirname = os.path.dirname(args.save_quant_model) +quant_json_path = os.path.join(dirname, f"quantized_{model_name}.json") + +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=args.save_quant_model, + save_quant_params_path=quant_json_path, + observer=args.observer, + data_preprocess=lambda x: x.to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) diff --git a/models/cv/object_detection/yolov11/ixrt/requirements.txt b/models/cv/object_detection/yolov11/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..51cbc9eeea641fc3858b9d660e992330218c5d3b --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/requirements.txt @@ -0,0 +1,7 @@ +tqdm +onnx==1.13.0 +onnxsim==0.4.36 +pycocotools +opencv-python==4.8.0.74 +ultralytics==8.3.59 +cuda-python \ No newline at end of file diff --git a/models/cv/object_detection/yolov11/ixrt/scripts/infer_yolov11_fp16_accuracy.sh b/models/cv/object_detection/yolov11/ixrt/scripts/infer_yolov11_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..2bec1d80e668c43b3ed3098c666f4f3aa654172e --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/scripts/infer_yolov11_fp16_accuracy.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=${PROJ_DIR} +DATASETS_DIR=${DATASETS_DIR} +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR=${CHECKPOINTS_DIR} +RUN_DIR=${PROJ_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolo11 +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolo11.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolo11_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --acc_target 0.3 +exit ${EXIT_STATUS} diff --git a/models/cv/object_detection/yolov11/ixrt/scripts/infer_yolov11_fp16_performance.sh b/models/cv/object_detection/yolov11/ixrt/scripts/infer_yolov11_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..500445e89470b0be92f8b1243ff1a533235981e4 --- /dev/null +++ b/models/cv/object_detection/yolov11/ixrt/scripts/infer_yolov11_fp16_performance.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=${PROJ_DIR} +DATASETS_DIR=${DATASETS_DIR} +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR=${CHECKPOINTS_DIR} +RUN_DIR=${PROJ_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolo11 +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolo11.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolo11_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --perf_only true \ + --fps_target 0.0 +exit ${EXIT_STATUS} diff --git a/models/cv/object_detection/yolov9/ixrt/README.md b/models/cv/object_detection/yolov9/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a8df237147592c7a49ab65a00aead30e267145f0 --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/README.md @@ -0,0 +1,66 @@ +# YOLOv9 (IXRT) + +## Model Description + +YOLOv9 represents a major leap in real-time object detection by introducing innovations like Programmable Gradient Information (PGI) and the Generalized Efficient Layer Aggregation Network (GELAN), significantly improving efficiency, accuracy, and adaptability. Developed by an open-source team and building on the YOLOv5 codebase, it sets new benchmarks on the MS COCO dataset. YOLOv9's architecture effectively addresses information loss in deep neural networks, enhancing learning capacity and ensuring higher detection accuracy. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100 | 4.2.0 | 25.06 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +### Install Dependencies + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r requirements.txt +``` + +## Model Conversion + +```bash +mkdir -p checkpoints/ +mv yolov9s.pt yolov9.pt +python3 export.py --weight yolov9.pt --batch 32 +onnxsim yolov9.onnx ./checkpoints/yolov9.onnx +``` + +## Model Inference + +```bash +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/coco/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov9_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov9_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | +| ------ | --------- | --------- | ------ | ------- | ------------ | +| YOLOv9 | 32 | FP16 | 814.42 | 0.625 | 0.464 | + +## References + +- [YOLOv9](https://docs.ultralytics.com/models/yolov9) diff --git a/models/cv/object_detection/yolov9/ixrt/build_engine.py b/models/cv/object_detection/yolov9/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..e0839fc3229ef4ab6d23c7ffeae36a1962bcaaf1 --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/build_engine.py @@ -0,0 +1,94 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from tensorrt import Dims + + +def build_engine_trtapi_staticshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build static shape engine done!") + + +def build_engine_trtapi_dynamicshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + + profile = builder.create_optimization_profile() + profile.set_shape("input", + Dims([1, 3, 608, 608]), + Dims([32, 3, 608, 608]), + Dims([64, 3, 608, 608]), + ) + build_config.add_optimization_profile(profile) + + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + # set dynamic + num_inputs = network.num_inputs + for i in range(num_inputs): + input_tensor = network.get_input(i) + input_tensor.shape = Dims([-1, 3, 608, 608]) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build dynamic shape engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + + +if __name__ == "__main__": + args = parse_args() + build_engine_trtapi_staticshape(args) + # build_engine_trtapi_dynamicshape(args) diff --git a/models/cv/object_detection/yolov9/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov9/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..4a36a555a5a7e6d3a3d82e84e9fd13f43080410c --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/ci/prepare.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install -r requirements.txt + +mkdir -p checkpoints/ +mv yolov9s.pt yolov9.pt +python3 export.py --weight yolov9.pt --batch 32 +onnxsim yolov9.onnx ./checkpoints/yolov9.onnx \ No newline at end of file diff --git a/models/cv/object_detection/yolov9/ixrt/common.py b/models/cv/object_detection/yolov9/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..74022b562af71ef8a234b5075252ea6a32505558 --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/common.py @@ -0,0 +1,337 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import time +import numpy as np +from tqdm import tqdm + +import tensorrt +from cuda import cuda, cudart + + +def load_class_names(namesfile): + class_names = [] + with open(namesfile, 'r') as fp: + lines = fp.readlines() + for line in lines: + line = line.rstrip() + class_names.append(line) + return class_names + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + + for x1, y1, x2, y2, _, p, c in boxes: + x1, y1, x2, y2, p = float(x1), float(y1), float(x2), float(y2), float(p) + c = int(c) + x = x1 + y = y1 + w = x2 - x1 + h = y2 - y1 + + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +################## About TensorRT ################# +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def setup_io_bindings(engine, context): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = context.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size + } + # print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations +########################################################## + + +################## About Loading Dataset ################# +def load_images(images_path): + """ + If image path is given, return it directly + For txt file, read it and return each line as image path + In other case, it's a folder, return a list with names of each + jpg, jpeg and png file + """ + input_path_extension = images_path.split('.')[-1] + if input_path_extension in ['jpg', 'jpeg', 'png']: + return [images_path] + elif input_path_extension == "txt": + with open(images_path, "r") as f: + return f.read().splitlines() + else: + return glob.glob( + os.path.join(images_path, "*.jpg")) + \ + glob.glob(os.path.join(images_path, "*.png")) + \ + glob.glob(os.path.join(images_path, "*.jpeg")) + +def prepare_batch(images_path, bs=16, input_size=(608, 608)): + + width, height = input_size + + batch_names = [] + batch_images = [] + batch_shapes = [] + + temp_names = [] + temp_images = [] + temp_shapes = [] + + for i, image_path in tqdm(enumerate(images_path), desc="Loading coco data"): + name = os.path.basename(image_path) + image = cv2.imread(image_path) + h, w, _ = image.shape + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image_resized = cv2.resize(image_rgb, (width, height), + interpolation=cv2.INTER_LINEAR) + custom_image = image_resized.transpose(2, 0, 1).astype(np.float32) / 255. + custom_image = np.expand_dims(custom_image, axis=0) + + if i != 0 and i % bs == 0: + batch_names.append(temp_names) + batch_images.append(np.concatenate(temp_images, axis=0)) + batch_shapes.append(temp_shapes) + + temp_names = [name] + temp_images = [custom_image] + temp_shapes = [(h, w)] + else: + temp_names.append(name) + temp_images.append(custom_image) + temp_shapes.append((h, w)) + + return batch_names, batch_images, batch_shapes +########################################################## + + +################## About Operating box ################# +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 +########################################################## + + +################## About pre and post processing ######### +def pre_processing(src_img, imgsz=608): + resized = cv2.resize(src_img, (imgsz, imgsz), interpolation=cv2.INTER_LINEAR) + in_img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) + in_img = np.transpose(in_img, (2, 0, 1)).astype(np.float32) + in_img = np.expand_dims(in_img, axis=0) + in_img /= 255.0 + return in_img + +def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False): + # print(boxes.shape) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1) * (y2 - y1) + order = confs.argsort()[::-1] + + keep = [] + while order.size > 0: + idx_self = order[0] + idx_other = order[1:] + + keep.append(idx_self) + + xx1 = np.maximum(x1[idx_self], x1[idx_other]) + yy1 = np.maximum(y1[idx_self], y1[idx_other]) + xx2 = np.minimum(x2[idx_self], x2[idx_other]) + yy2 = np.minimum(y2[idx_self], y2[idx_other]) + + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + inter = w * h + + if min_mode: + over = inter / np.minimum(areas[order[0]], areas[order[1:]]) + else: + over = inter / (areas[order[0]] + areas[order[1:]] - inter) + + inds = np.where(over <= nms_thresh)[0] + order = order[inds + 1] + + return np.array(keep) + + +def post_processing(img, conf_thresh, nms_thresh, output, num_classes=80): + + # [batch, num, 1, 4] + box_array = output[:, :, :4] + # [batch, num, 2] + class_confs = output[:, :, 4:] + + max_conf = class_confs[:, :, 1] + max_id = class_confs[:, :, 0] + + bboxes_batch = [] + for i in range(box_array.shape[0]): + + argwhere = max_conf[i] > conf_thresh + l_box_array = box_array[i, argwhere, :] + l_max_conf = max_conf[i, argwhere] + l_max_id = max_id[i, argwhere] + + bboxes = [] + # nms for each class + for j in range(num_classes): + + cls_argwhere = l_max_id == j + ll_box_array = l_box_array[cls_argwhere, :] + ll_max_conf = l_max_conf[cls_argwhere] + ll_max_id = l_max_id[cls_argwhere] + + keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh) + + if (keep.size > 0): + ll_box_array = ll_box_array[keep, :] + ll_max_conf = ll_max_conf[keep] + ll_max_id = ll_max_id[keep] + + for k in range(ll_box_array.shape[0]): + bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], + ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]]) + + bboxes_batch.append(bboxes) + + return bboxes_batch +########################################################## + diff --git a/models/cv/object_detection/yolov9/ixrt/export.py b/models/cv/object_detection/yolov9/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..388e3a8540b58262f9d8c8e545848addf3afb606 --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/export.py @@ -0,0 +1,43 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ultralytics import YOLO + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--batch", + type=int, + required=True, + help="batchsize of the model.") + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = YOLO(args.weight).cpu() + + model.export(format='onnx', batch=args.batch, imgsz=(640, 640), opset=11) + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/yolov9/ixrt/inference.py b/models/cv/object_detection/yolov9/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..1ac908a7c8f8092f3fb59018cf295ecfb954fa0c --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/inference.py @@ -0,0 +1,250 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import json +import argparse +import time +import tensorrt +from tensorrt import Dims +from cuda import cuda, cudart +import torch +import numpy as np +from tqdm import tqdm + +from common import create_engine_context, setup_io_bindings + +from pathlib import Path + +from ultralytics.cfg import get_cfg +from ultralytics.data import converter +from ultralytics.utils import DEFAULT_CFG +from ultralytics.data.utils import check_det_dataset +from ultralytics.utils.metrics import ConfusionMatrix +from ultralytics.models.yolo.detect import DetectionValidator + +coco_classes = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', + 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', + 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', + 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', + 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', + 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', + 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'} + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_engine", + type=str, + required=True, + help="ixrt engine path.") + + parser.add_argument("--bsz", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--warm_up", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=0.0, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=0.0, + help="Model inference FPS target.") + + parser.add_argument("--conf", + type=float, + default=0.001, + help="confidence threshold.") + + parser.add_argument("--iou", + type=float, + default=0.65, + help="iou threshold.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +class IxRT_Validator(DetectionValidator): + def __call__(self, config, data): + self.data = data + self.stride = 32 + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + self.init_metrics() + + total_num = 0 + + input_name = "input" + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + engine, context = create_engine_context(config.model_engine, logger) + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims((config.bsz,3,config.imgsz,config.imgsz))) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + forward_time = 0.0 + num_samples = 0 + + e2e_start_time = time.time() + for batch in tqdm(self.dataloader): + batch = self.preprocess(batch) + + imgs = batch['img'] + pad_batch = len(imgs) != self.args.batch + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:])) + + batch_data = np.ascontiguousarray(imgs) + data_shape = batch_data.shape + + cur_bsz_sample = batch_data.shape[0] + num_samples += cur_bsz_sample + + # Set input + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims(data_shape)) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + + + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + for alloc in allocations: + if not alloc: + continue + (err,) = cudart.cudaFree(alloc) + assert err == cudart.cudaError_t.cudaSuccess + + if pad_batch: + output = output[:origin_size] + + outputs = torch.from_numpy(output) + + preds = self.postprocess([outputs]) + + self.update_metrics(preds, batch) + + e2e_end_time = time.time() + if config.perf_only: + fps = num_samples / forward_time + return fps + else: + stats = self.get_stats() + + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + print(f'Saving {f.name} ...') + json.dump(self.jdict, f) # flatten and save + + stats = self.eval_json(stats) + + end2end_time = e2e_end_time - e2e_start_time + print(F"E2E time : {end2end_time:.3f} seconds") + + return stats + + def init_metrics(self): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = self.data['names'] + self.nc = len(self.names) + self.metrics.names = self.names + self.confusion_matrix = ConfusionMatrix(nc=80) + self.seen = 0 + self.jdict = [] + self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[]) + +def main(): + config = parse_args() + + batch_size = config.bsz + + overrides = {'mode': 'val'} + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = batch_size + cfg_args.save_json = True + + data = { + 'path': Path(config.datasets), + 'val': os.path.join(config.datasets, 'val2017.txt'), + 'names': coco_classes + } + + validator = IxRT_Validator(args=cfg_args, save_dir=Path('.')) + + if config.perf_only: + fps = validator(config, data) + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + else: + stats = validator(config, data) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/yolov9/ixrt/quant.py b/models/cv/object_detection/yolov9/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..48f9c914f4db85525bb7f52a1a5eec0cd9e3a8d0 --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/quant.py @@ -0,0 +1,105 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from common import letterbox + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov4_bs16_without_decoder.onnx") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_quant_model", type=str, help="save the quantization model path", default=None) + parser.add_argument("--bsz", type=int, default=16) + parser.add_argument("--step", type=int, default=32) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=608) + parser.add_argument("--use_letterbox", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + + +def get_dataloader(data_dir, step=32, batch_size=16, new_shape=[608, 608], use_letterbox=False): + num = step * batch_size + val_list = [os.path.join(data_dir, x) for x in os.listdir(data_dir)] + random.shuffle(val_list) + pic_list = val_list[:num] + + calibration_dataset = [] + for file_path in pic_list: + pic_data = cv2.imread(file_path) + org_img = pic_data + assert org_img is not None, 'Image not Found ' + file_path + h0, w0 = org_img.shape[:2] + + if use_letterbox: + img, ratio, dwdh = letterbox(org_img, new_shape=(new_shape[1], new_shape[0]), auto=False, scaleup=True) + else: + img = cv2.resize(org_img, new_shape) + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) / 255.0 # 0~1 np array + img = torch.from_numpy(img).float() + + calibration_dataset.append(img) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=True, + batch_size=batch_size, + drop_last=True + ) + return calibration_dataloader + +dataloader = get_dataloader( + data_dir=args.dataset_dir, + step=args.step, + batch_size=args.bsz, + new_shape=(args.imgsz, args.imgsz), + use_letterbox=args.use_letterbox +) + +dirname = os.path.dirname(args.save_quant_model) +quant_json_path = os.path.join(dirname, f"quantized_{model_name}.json") + +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=args.save_quant_model, + save_quant_params_path=quant_json_path, + observer=args.observer, + data_preprocess=lambda x: x.to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) diff --git a/models/cv/object_detection/yolov9/ixrt/requirements.txt b/models/cv/object_detection/yolov9/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..346fe1c7d21fa2d840c877dec15c1e30bd58a1d3 --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/requirements.txt @@ -0,0 +1,7 @@ +tqdm +onnx +onnxsim +pycocotools +opencv-python==4.8.0.74 +ultralytics==8.2.51 +cuda-python \ No newline at end of file diff --git a/models/cv/object_detection/yolov9/ixrt/scripts/infer_yolov9_fp16_accuracy.sh b/models/cv/object_detection/yolov9/ixrt/scripts/infer_yolov9_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..ba48ad6616966b870cfc7279ee0352f45306a15b --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/scripts/infer_yolov9_fp16_accuracy.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=${PROJ_DIR} +DATASETS_DIR=${DATASETS_DIR} +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR=${CHECKPOINTS_DIR} +RUN_DIR=${PROJ_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov9 +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov9.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov9_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --acc_target 0.3 +exit ${EXIT_STATUS} diff --git a/models/cv/object_detection/yolov9/ixrt/scripts/infer_yolov9_fp16_performance.sh b/models/cv/object_detection/yolov9/ixrt/scripts/infer_yolov9_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..17059a71e90c3126f22317f5ee68e84ed528e29d --- /dev/null +++ b/models/cv/object_detection/yolov9/ixrt/scripts/infer_yolov9_fp16_performance.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=${PROJ_DIR} +DATASETS_DIR=${DATASETS_DIR} +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR=${CHECKPOINTS_DIR} +RUN_DIR=${PROJ_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov9 +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov9.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov9_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --perf_only true \ + --fps_target 0.0 +exit ${EXIT_STATUS}