diff --git a/models/cv/classification/densenet121/ixrt/README.md b/models/cv/classification/densenet121/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fc6f99781dd5f5f9d970f268c9f993f861c3d261 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/README.md @@ -0,0 +1,54 @@ +# DenseNet + +## Description +Dense Convolutional Network (DenseNet), connects each layer to every other layer in a feed-forward fashion. Whereas traditional convolutional networks with L layers have L connections - one between each layer and its subsequent layer - our network has L(L+1)/2 direct connections. + +## Setup + +### Install +```bash +yum install mesa-libGL + +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install ppq +pip3 install pycuda +``` + +### Download + +Dataset: to download the validation dataset. + +### Model Conversion +```bash +mkdir checkpoints +python3 export_onnx.py --output_model checkpoints/densenet121.onnx +``` + +## Inference +```bash +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +export CONFIG_DIR=config/DENSENET_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_densenet_fp16_accuracy.sh +# Performance +bash scripts/infer_densenet_fp16_performance.sh +``` + + + +## Results + +Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) +---------|-----------|----------|----------|----------|-------- +DenseNet | | FP16 | 1536.89 | 0.7442 | 0.9197 + + diff --git a/models/cv/classification/densenet121/ixrt/build_engine.py b/models/cv/classification/densenet121/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..01e126bc715aa77d38c3abdd1e02191a262689e7 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/build_engine.py @@ -0,0 +1,109 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from calibration_dataset import getdataloader +import cuda.cudart as cudart + +def assertSuccess(err): + assert(err == cudart.cudaError_t.cudaSuccess) + +class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): + + def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): + super().__init__() + self.cache_file = cache_file + self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) + self.batch_generator = iter(self.image_batcher) + size = img_sz*img_sz*3*bsz + __import__('pdb').set_trace() + err, self.batch_allocation = cudart.cudaMalloc(size) + assertSuccess(err) + + def __del__(self): + err,= cudart.cudaFree(self.batch_allocation) + assertSuccess(err) + + def get_batch_size(self): + return self.image_batcher.batch_size + + def get_batch(self, names): + try: + batch, _ = next(self.batch_generator) + batch = batch.numpy() + __import__('pdb').set_trace() + cudart.cudaMemcpy(self.batch_allocation, + np.ascontiguousarray(batch), + batch.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) + return [int(self.batch_allocation)] + except StopIteration: + return None + + def read_calibration_cache(self): + if os.path.exists(self.cache_file): + with open(self.cache_file, "rb") as f: + return f.read() + + def write_calibration_cache(self, cache): + with open(self.cache_file, "wb") as f: + f.write(cache) + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + print("precision : ", precision) + build_config.set_flag(precision) + if config.precision == "int8": + build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + args = parser.parse_args() + return args + +if __name__ == "__main__": + # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/") + # print(cali.get_batch_size()) + # print(cali.get_batch("hello")) + args = parse_args() + main(args) diff --git a/models/cv/classification/densenet121/ixrt/calibration_dataset.py b/models/cv/classification/densenet121/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..ec931c656abf5b2309dc9938490df46e4e8cdb19 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/calibration_dataset.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=True, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader diff --git a/models/cv/classification/densenet121/ixrt/common.py b/models/cv/classification/densenet121/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..0458195e5b7980ce70585d7284ca8a875afa3fd6 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/common.py @@ -0,0 +1,78 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/densenet121/ixrt/config/DENSENET_CONFIG b/models/cv/classification/densenet121/ixrt/config/DENSENET_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..8423aff6e30e86e281c921510672db9ab2846599 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/config/DENSENET_CONFIG @@ -0,0 +1,33 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=DenseNet +ORIGINE_MODEL=densenet121.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=minmax +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/densenet121/ixrt/export_onnx.py b/models/cv/classification/densenet121/ixrt/export_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..ff86753a004cd6611c7c7104e0061904bc3d2184 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/export_onnx.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import torch +import torchvision.models as models +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = models.densenet121(pretrained=True) +model.cuda() +model.eval() +input = torch.randn(1, 3, 224, 224, device='cuda') +export_onnx_file = args.output_model + +torch.onnx.export(model, + input, + export_onnx_file, + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names = ['input'], + output_names = ['output'],) +print(" ") +print('Model has been converted to ONNX') +print("exit") +exit() diff --git a/models/cv/classification/densenet121/ixrt/inference.py b/models/cv/classification/densenet121/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..2c9dcb3f9cc5b9a26903651a31fafa16d8f0db31 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/inference.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/densenet121/ixrt/modify_batchsize.py b/models/cv/classification/densenet121/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..689b7a972dcbfec77c185592ede16bb4f04fa4fd --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/modify_batchsize.py @@ -0,0 +1,56 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/densenet121/ixrt/quant.py b/models/cv/classification/densenet121/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..5d71c828629bb0370aa40c5bcdcf117812bbaedc --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/quant.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: + +在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 + +This file will show you how to quantize your network with PPQ + You should prepare your model and calibration dataset as follow: + + ~/working/model.onnx <-- your model + ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset + +if you are using caffe model: + ~/working/model.caffemdoel <-- your model + ~/working/model.prototext <-- your model + +### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### + +quantized model will be generated at: ~/working/quantized.onnx +""" +from ppq import * +from ppq.api import * +import os +from calibration_dataset import getdataloader +import argparse +import random +import numpy as np +import torch + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + + +config = parse_args() + +# modify configuration below: +WORKING_DIRECTORY = 'checkpoints' # choose your working directory +TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform +MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE +INPUT_LAYOUT = 'chw' # input data layout, chw or hwc +NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network +EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. +REQUIRE_ANALYSE = False +TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 +# ------------------------------------------------------------------- +# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 +# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx +# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 +# ------------------------------------------------------------------- +graph = None +if MODEL_TYPE == NetworkFramework.ONNX: + graph = load_onnx_graph(onnx_import_file=config.model) +if MODEL_TYPE == NetworkFramework.CAFFE: + graph = load_caffe_graph( + caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), + prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) +assert graph is not None, 'Graph Loading Error, Check your input again.' + +# ------------------------------------------------------------------- +# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 +# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 +# ------------------------------------------------------------------- +QS = QuantizationSettingFactory.default_setting() + +# ------------------------------------------------------------------- +# 下面向你展示了如何使用 finetuning 过程提升量化精度 +# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 +# 开启他们的方式都是 QS.xxxx = True +# 按需使用,不要全部打开,容易起飞 +# ------------------------------------------------------------------- +if TRAINING_YOUR_NETWORK: + QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 + QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 + QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' + + +dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) +# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 +# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 +with ENABLE_CUDA_KERNEL(): + print('网络正量化中,根据你的量化配置,这将需要一段时间:') + quantized = quantize_native_model( + setting=QS, # setting 对象用来控制标准量化逻辑 + model=graph, + calib_dataloader=dataloader, + calib_steps=config.step, + input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 + inputs=None, + # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] + collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, + # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None + platform=TARGET_PLATFORM, + device=EXECUTING_DEVICE, + do_quantize=True) + + # ------------------------------------------------------------------- + # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor + # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 + # 请注意,必须在 export 之前执行此操作。 + # ------------------------------------------------------------------- + executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) + # output = executor.forward(input) + + # ------------------------------------------------------------------- + # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 + # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% + # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 + # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 + # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 + # ------------------------------------------------------------------- + print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') + reports = graphwise_error_analyse( + graph=quantized, running_device=EXECUTING_DEVICE, steps=32, + dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) + for op, snr in reports.items(): + if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') + + if REQUIRE_ANALYSE: + print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') + layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, + interested_outputs=None, + dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) + + # ------------------------------------------------------------------- + # 使用 export_ppq_graph 函数来导出量化后的模型 + # PPQ 会根据你所选择的导出平台来修改模型格式 + # ------------------------------------------------------------------- + print('网络量化结束,正在生成目标文件:') + export_ppq_graph( + graph=quantized, platform=TARGET_PLATFORM, + graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), + config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..b743d7084ae058118c29daaf494769fc293ceb41 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_accuracy.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..e7a4f1a7276406a0ed7400af4368b5bec2a06e06 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_performance.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/densenet121/ixrt/simplify_model.py b/models/cv/classification/densenet121/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..9948a9fa083ff99ff88e556e96614b02cccaa965 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/simplify_model.py @@ -0,0 +1,40 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/classification/inception_v3/ixrt/README.md b/models/cv/classification/inception_v3/ixrt/README.md new file mode 100755 index 0000000000000000000000000000000000000000..415d7106734e2b9fb63ae36b1ebc729237bbff2a --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/README.md @@ -0,0 +1,65 @@ +# Inception V3 + +## Description + +Inception v3 is a convolutional neural network architecture designed for image recognition and classification tasks. Developed by Google, it represents an evolution of the earlier Inception models. Inception v3 is characterized by its deep architecture, featuring multiple layers with various filter sizes and efficient use of computational resources. The network employs techniques like factorized convolutions and batch normalization to enhance training stability and accelerate convergence. + +## Setup + +### Install +``` +yum install mesa-libGL +pip3 install pycuda +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install ppq +pip3 install protobuf==3.20.0 +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion +```bash + +mkdir checkpoints +python3 export.py --weight inception_v3_google-0cc3c7bd.pth --output checkpoints/inception-v3.onnx +``` + +## Inference +```bash +export PROJ_DIR=/Path/to/inception_v3/ixrt +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=/Path/to/inception_v3/ixrt +export CONFIG_DIR=/Path/to/config/INCEPTION_V3_CONFIG +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_inception_v3_fp16_accuracy.sh +# Performance +bash scripts/infer_inception_v3_fp16_performance.sh +``` + +### INT8 +```bash +# Accuracy +bash scripts/infer_inception_v3_int8_accuracy.sh +# Performance +bash scripts/infer_inception_v3_int8_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) +-------------|-----------|----------|----------|----------|-------- +Inception_v3 | 32 | FP16 | 3515.29 | 70.64 | 89.33 +Inception_v3 | 32 | INT8 | 4916.32 | 70.45 | 89.28 diff --git a/models/cv/classification/inception_v3/ixrt/build_engine.py b/models/cv/classification/inception_v3/ixrt/build_engine.py new file mode 100755 index 0000000000000000000000000000000000000000..41e6af8d2585da92d7f0354f7ced4ea4978bd652 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/build_engine.py @@ -0,0 +1,106 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from calibration_dataset import getdataloader +import cuda.cudart as cudart + +def assertSuccess(err): + assert(err == cudart.cudaError_t.cudaSuccess) + +class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): + + def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): + super().__init__() + self.cache_file = cache_file + self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) + self.batch_generator = iter(self.image_batcher) + size = img_sz*img_sz*3*bsz + __import__('pdb').set_trace() + err, self.batch_allocation = cudart.cudaMalloc(size) + assertSuccess(err) + + def __del__(self): + err,= cudart.cudaFree(self.batch_allocation) + assertSuccess(err) + + def get_batch_size(self): + return self.image_batcher.batch_size + + def get_batch(self, names): + try: + batch, _ = next(self.batch_generator) + batch = batch.numpy() + __import__('pdb').set_trace() + cudart.cudaMemcpy(self.batch_allocation, + np.ascontiguousarray(batch), + batch.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) + return [int(self.batch_allocation)] + except StopIteration: + return None + + def read_calibration_cache(self): + if os.path.exists(self.cache_file): + with open(self.cache_file, "rb") as f: + return f.read() + + def write_calibration_cache(self, cache): + with open(self.cache_file, "wb") as f: + f.write(cache) + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + print("precision : ", precision) + build_config.set_flag(precision) + if config.precision == "int8": + build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/build_i8_engine.py b/models/cv/classification/inception_v3/ixrt/build_i8_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..6e356260376e8fc527251b5c842bbea535ffedcd --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/build_i8_engine.py @@ -0,0 +1,113 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import json +import os + +import tensorrt +import tensorrt as trt + +TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) + +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + +def GiB(val): + return val * 1 << 30 + + +def json_load(filename): + with open(filename) as json_file: + data = json.load(json_file) + return data + + +def setDynamicRange(network, json_file): + """Sets ranges for network layers.""" + quant_param_json = json_load(json_file) + act_quant = quant_param_json["act_quant_info"] + + for i in range(network.num_inputs): + input_tensor = network.get_input(i) + if act_quant.__contains__(input_tensor.name): + print(input_tensor.name) + value = act_quant[input_tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + input_tensor.dynamic_range = (tensor_min, tensor_max) + + for i in range(network.num_layers): + layer = network.get_layer(i) + + for output_index in range(layer.num_outputs): + tensor = layer.get_output(output_index) + + if act_quant.__contains__(tensor.name): + value = act_quant[tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + tensor.dynamic_range = (tensor_min, tensor_max) + else: + print("\033[1;32m%s\033[0m" % tensor.name) + + +def build_engine(onnx_file, json_file, engine_file): + builder = trt.Builder(TRT_LOGGER) + network = builder.create_network(EXPLICIT_BATCH) + + config = builder.create_builder_config() + + # If it is a dynamic onnx model , you need to add the following. + # profile = builder.create_optimization_profile() + # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) + # config.add_optimization_profile(profile) + + parser = trt.OnnxParser(network, TRT_LOGGER) + # config.max_workspace_size = GiB(1) + if not os.path.exists(onnx_file): + quit("ONNX file {} not found".format(onnx_file)) + + with open(onnx_file, "rb") as model: + if not parser.parse(model.read()): + print("ERROR: Failed to parse the ONNX file.") + for error in range(parser.num_errors): + print(parser.get_error(error)) + return None + + config.set_flag(trt.BuilderFlag.INT8) + + setDynamicRange(network, json_file) + + engine = builder.build_engine(network, config) + + with open(engine_file, "wb") as f: + f.write(engine.serialize()) + + +if __name__ == "__main__": + # Add plugins if needed + # import ctypes + # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") + parser = argparse.ArgumentParser( + description="Writing qparams to onnx to convert tensorrt engine." + ) + parser.add_argument("--onnx", type=str, default=None) + parser.add_argument("--qparam_json", type=str, default=None) + parser.add_argument("--engine", type=str, default=None) + arg = parser.parse_args() + + build_engine(arg.onnx, arg.qparam_json, arg.engine) + print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/calibration_dataset.py b/models/cv/classification/inception_v3/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..d7525d5136168cc8fb1d24a28f1b71b85ce4cc92 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/calibration_dataset.py @@ -0,0 +1,113 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/common.py b/models/cv/classification/inception_v3/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..abdc147cb913da3736ab2bc72628dc9cebf78d36 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/common.py @@ -0,0 +1,79 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/inception_v3/ixrt/config/INCEPTION_V3_CONFIG b/models/cv/classification/inception_v3/ixrt/config/INCEPTION_V3_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..f5223eaf824040a2ecad2a0759925132aa82bb0d --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/config/INCEPTION_V3_CONFIG @@ -0,0 +1,35 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=Inception_v3 +ORIGINE_MODEL=inception-v3.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=32 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= + diff --git a/models/cv/classification/inception_v3/ixrt/export.py b/models/cv/classification/inception_v3/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..75b90b095405799861a6b3117729c037d6e07786 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/export.py @@ -0,0 +1,59 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.inception_v3() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dummy_input = torch.randn(32, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/inception_v3/ixrt/inference.py b/models/cv/classification/inception_v3/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..62ec18b30cd51167fb8d7f2babc01430511ead3f --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/inference.py @@ -0,0 +1,158 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/inception_v3/ixrt/modify_batchsize.py b/models/cv/classification/inception_v3/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..4ac42a3084920c449bb80494518c5fedc8c64316 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/inception_v3/ixrt/quant.py b/models/cv/classification/inception_v3/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..40cd61716c4c1e97737ce75b985c01c33bf79816 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/quant.py @@ -0,0 +1,167 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: + +在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 + +This file will show you how to quantize your network with PPQ + You should prepare your model and calibration dataset as follow: + + ~/working/model.onnx <-- your model + ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset + +if you are using caffe model: + ~/working/model.caffemdoel <-- your model + ~/working/model.prototext <-- your model + +### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### + +quantized model will be generated at: ~/working/quantized.onnx +""" +from ppq import * +from ppq.api import * +import os +from calibration_dataset import getdataloader +import argparse +import random +import numpy as np +import torch + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=288) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + + +config = parse_args() + +# modify configuration below: +WORKING_DIRECTORY = 'checkpoints' # choose your working directory +TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform +MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE +INPUT_LAYOUT = 'chw' # input data layout, chw or hwc +NETWORK_INPUTSHAPE = [32, 3, 224, 224] # input shape of your network +EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. +REQUIRE_ANALYSE = False +TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 +# ------------------------------------------------------------------- +# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 +# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx +# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 +# ------------------------------------------------------------------- +graph = None +if MODEL_TYPE == NetworkFramework.ONNX: + graph = load_onnx_graph(onnx_import_file=config.model) +if MODEL_TYPE == NetworkFramework.CAFFE: + graph = load_caffe_graph( + caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), + prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) +assert graph is not None, 'Graph Loading Error, Check your input again.' + +# ------------------------------------------------------------------- +# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 +# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 +# ------------------------------------------------------------------- +QS = QuantizationSettingFactory.default_setting() + +# ------------------------------------------------------------------- +# 下面向你展示了如何使用 finetuning 过程提升量化精度 +# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 +# 开启他们的方式都是 QS.xxxx = True +# 按需使用,不要全部打开,容易起飞 +# ------------------------------------------------------------------- +if TRAINING_YOUR_NETWORK: + QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 + QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 + QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' + + + +dataloader = getdataloader(config.dataset_dir, config.step, config.bsz, img_sz=config.imgsz) +# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 +# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 +with ENABLE_CUDA_KERNEL(): + print('网络正量化中,根据你的量化配置,这将需要一段时间:') + quantized = quantize_native_model( + setting=QS, # setting 对象用来控制标准量化逻辑 + model=graph, + calib_dataloader=dataloader, + calib_steps=config.step, + input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 + inputs=None, + # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] + collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, + # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None + platform=TARGET_PLATFORM, + device=EXECUTING_DEVICE, + do_quantize=True) + + # ------------------------------------------------------------------- + # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor + # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 + # 请注意,必须在 export 之前执行此操作。 + # ------------------------------------------------------------------- + executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) + # output = executor.forward(input) + + # ------------------------------------------------------------------- + # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 + # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% + # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 + # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 + # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 + # ------------------------------------------------------------------- + print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') + reports = graphwise_error_analyse( + graph=quantized, running_device=EXECUTING_DEVICE, steps=32, + dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) + for op, snr in reports.items(): + if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') + + if REQUIRE_ANALYSE: + print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') + layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, + interested_outputs=None, + dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) + + # ------------------------------------------------------------------- + # 使用 export_ppq_graph 函数来导出量化后的模型 + # PPQ 会根据你所选择的导出平台来修改模型格式 + # ------------------------------------------------------------------- + print('网络量化结束,正在生成目标文件:') + export_ppq_graph( + graph=quantized, platform=TARGET_PLATFORM, + graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), + config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/refine_model.py b/models/cv/classification/inception_v3/ixrt/refine_model.py new file mode 100644 index 0000000000000000000000000000000000000000..000ee4dcbf3df294a34cd83c97527bba00024ac7 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/refine_model.py @@ -0,0 +1,291 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import dataclasses + +import torch +import onnx + +from refine_utils.matmul_to_gemm_pass import FusedGemmPass +from refine_utils.linear_pass import FusedLinearPass + +from refine_utils.common import * + +def get_constant_input_name_of_operator(graph: Graph, operator: Operator): + const = None + for input in operator.inputs: + if not graph.containe_var(input): + continue + + if not graph.is_leaf_variable(input): + continue + + input_var = graph.get_variable(input) + if input_var.value is not None: + const = input + return const + +class FuseLayerNormPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + find_sequence_subgraph( + graph, + [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], + self.fuse_layer_norm, + strict=False + ) + return graph + + def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): + # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 + if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: + return + + # 检查 POW 的输入是否和 DIV 的输入是一致的 + if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: + return + + # 检查部分算子的输出是否被多个算子使用 + nodes = pattern.nodes + for node in [nodes[0]] + nodes[2:-1]: + next_ops = graph.get_next_operators(node.operator) + if len(next_ops) > 1: + return + + eps = None + for input in nodes[4].operator.inputs: + input_var = graph.get_variable(input) + if input_var.value is not None and graph.is_leaf_variable(input): + eps = to_py_type(input_var.value) + + scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) + bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + bias_var = graph.get_variable(bias) + print(bias_var) + + attributes = { + "axis": nodes[0].operator.attributes.axes, + "epsilon": eps, + } + + + layer_norm_op = self.transform.make_operator( + op_type="LayerNormalization", + inputs=[nodes[0].operator.inputs[0], scale, bias], + outputs=[nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(layer_norm_op) + +class FusedGeluPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True + ) + return graph + + def fuse_gelu(self, graph: Graph, pattern: PatternGraph): + nodes = pattern.nodes + prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] + next_ops = self.transform.get_next_operators(prev_op) + if len(next_ops) != 2: + return + + if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: + return + + gelu_op_input = None + for input in nodes[3].operator.inputs: + if input in nodes[0].operator.inputs: + gelu_op_input = input + break + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + gelu_op = self.transform.make_operator( + op_type=OP.GELU, + inputs=[gelu_op_input], + outputs=[nodes[-1].operator.outputs[0]] + ) + self.transform.add_operator(gelu_op) + +@dataclasses.dataclass +class NormalizeAttr(BaseOperatorAttr): + p: float = 2.0 + epsilon: float = 1e-12 + axis: int = 1 + + +@registe_operator(OP.GELU) +class GeluOperator(BaseOperator): + + def call( + self, + executor, + operator: Operator, + inputs: List, + attr: NormalizeAttr, + ): + return F.gelu(inputs[0]) + + def convert_onnx_operator( + self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto + ) -> Operator: + return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) + + def quantize( + self, + graph: Graph, + op: Operator, + operator_observer_config: QuantOperatorObserverConfig, + quant_outputs: bool = False, + ): + return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) + + + +class ClearUnsedVariables(BasePass): + + def process(self, graph: Graph) -> Graph: + vars = list(graph.variables) + + for var in vars: + if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): + graph.delete_variable(var) + + quant_params = list(graph.quant_parameters.keys()) + for var in quant_params: + if not graph.containe_var(var): + graph.quant_parameters.pop(var) + + return graph + +class FormatLayerNorm(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if "LayerNorm" in op.op_type: + self.format_layer_norm(graph, op) + return graph + + def format_layer_norm(self, graph, operator): + if not hasattr(operator.attributes, "axis"): + return + if isinstance(operator.attributes.axis, (tuple, list)): + operator.attributes.axis = operator.attributes.axis[0] + +class FormatReshape(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if op.op_type == "Reshape": + self.format_reshape(graph, op) + + return graph + + def format_reshape(self, graph, operator): + shape = graph.get_variable(operator.inputs[1]) + shape.value = torch.tensor(shape.value, dtype=torch.int64) + +class FormatScalar(BasePass): + + def process(self, graph: Graph): + for var in graph.variables.values(): + var: Variable + use_ops = graph.get_dst_operators(var) + + if len(use_ops) == 0: + continue + + if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: + continue + + if var.value is not None and var.value.ndim == 0: + var.value = var.value.reshape(1) + print(f"Reshape scalar to tensor for {var.name}.") + + return graph + +class RenamePass(BasePass): + + def process(self, graph:Graph): + + names = [name for name in graph.operators.keys()] + for old_name in names: + new_name = old_name.replace("/", "#") + + graph.rename_operator(old_name, new_name) + + names = [name for name in graph.variables.keys()] + for name in names: + new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") + + graph.rename_vaiable(name, new_name, + with_variables=True, + with_operator_outputs=True) + + return graph + +def create_pipeline(example_inputs): + return PassSequence( + # FuseLayerNormPass(), + FusedGeluPass(), + + # ClearUnsedVariables(), + # FormatLayerNorm(), + # FormatReshape(), + # FormatScalar(), + # RenamePass() + ) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--onnx_path", type=str) + parser.add_argument("--dst_onnx_path", type=str) + + parser.add_argument("--bsz", type=int, default=8, + help="Batch size") + parser.add_argument("--imgsz", type=int, default=224, + help="Image size") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) + + refine_pipline = Pipeline( + create_source(f"{args.onnx_path}", example_inputs=example_inputs), + create_pipeline(example_inputs), + create_target( + f"{args.dst_onnx_path}", + example_inputs=example_inputs, + ) + ) + refine_pipline.run() + + print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_fp16_accuracy.sh b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_fp16_accuracy.sh new file mode 100755 index 0000000000000000000000000000000000000000..e62cc5d702156b26432b9eb892aecd9c0432be16 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_fp16_accuracy.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_fp16_performance.sh b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_fp16_performance.sh new file mode 100755 index 0000000000000000000000000000000000000000..05c9986fc0a45b12847ed9c333f014f170afcdcf --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_fp16_performance.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_accuracy.sh b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_accuracy.sh new file mode 100755 index 0000000000000000000000000000000000000000..cf11c5a69e18295bcf049ef55b4806d9264b899c --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_accuracy.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_performance.sh b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_performance.sh new file mode 100755 index 0000000000000000000000000000000000000000..d13d7d01600b9afd44af034a6d207f2662004654 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_performance.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/simplify_model.py b/models/cv/classification/inception_v3/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..4d53a474011539600c4cf2b92617fa4e51e18273 --- /dev/null +++ b/models/cv/classification/inception_v3/ixrt/simplify_model.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/classification/resnet_v1_d50/ixrt/README.md b/models/cv/classification/resnet_v1_d50/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..faad1375681610e5504a17577a7ee712fdc73f17 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/README.md @@ -0,0 +1,62 @@ +# ResNet50 + +## Description +Residual Networks, or ResNets, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. + +## Setup + +### Install +```bash +yum install mesa-libGL + +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install ppq +pip3 install mmpretrain +pip3 install mmcv-lite +``` + +### Download + +Dataset: to download the validation dataset. + +### Model Conversion +```bash +mkdir checkpoints +python3 export_onnx.py --output_model checkpoints/resnet_v1_d50.onnx +``` + +## Inference +```bash +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +export CONFIG_DIR=config/RESNET_V1_D50_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_resnet_v1_d50_fp16_accuracy.sh +# Performance +bash scripts/infer_resnet_v1_d50_fp16_performance.sh +``` + +### INT8 +```bash +# Accuracy +bash scripts/infer_resnet_v1_d50_int8_accuracy.sh +# Performance +bash scripts/infer_resnet_v1_d50_int8_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) +---------|-----------|----------|----------|----------|-------- +ResNet50 | | FP16 | 3887.55 | 0.77544 | 0.93568 +ResNet50 | | INT8 | 7148.58 | 0.7711 | 0.93514 + + diff --git a/models/cv/classification/resnet_v1_d50/ixrt/build_engine.py b/models/cv/classification/resnet_v1_d50/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..4fbcc0154bf55991e4f8a983377f5bc90e760bdc --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/build_engine.py @@ -0,0 +1,109 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from calibration_dataset import getdataloader +import cuda.cudart as cudart + +def assertSuccess(err): + assert(err == cudart.cudaError_t.cudaSuccess) + +class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): + + def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): + super().__init__() + self.cache_file = cache_file + self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) + self.batch_generator = iter(self.image_batcher) + size = img_sz*img_sz*3*bsz + __import__('pdb').set_trace() + err, self.batch_allocation = cudart.cudaMalloc(size) + assertSuccess(err) + + def __del__(self): + err,= cudart.cudaFree(self.batch_allocation) + assertSuccess(err) + + def get_batch_size(self): + return self.image_batcher.batch_size + + def get_batch(self, names): + try: + batch, _ = next(self.batch_generator) + batch = batch.numpy() + __import__('pdb').set_trace() + cudart.cudaMemcpy(self.batch_allocation, + np.ascontiguousarray(batch), + batch.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) + return [int(self.batch_allocation)] + except StopIteration: + return None + + def read_calibration_cache(self): + if os.path.exists(self.cache_file): + with open(self.cache_file, "rb") as f: + return f.read() + + def write_calibration_cache(self, cache): + with open(self.cache_file, "wb") as f: + f.write(cache) + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + print("precision : ", precision) + build_config.set_flag(precision) + if config.precision == "int8": + build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + args = parser.parse_args() + return args + +if __name__ == "__main__": + # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/") + # print(cali.get_batch_size()) + # print(cali.get_batch("hello")) + args = parse_args() + main(args) diff --git a/models/cv/classification/resnet_v1_d50/ixrt/build_i8_engine.py b/models/cv/classification/resnet_v1_d50/ixrt/build_i8_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..6038b33f50cff7a14efcefa6673ae9d2fd19870b --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/build_i8_engine.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import json +import os + +import tensorrt +import tensorrt as trt + +TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) + +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + +def GiB(val): + return val * 1 << 30 + + +def json_load(filename): + with open(filename) as json_file: + data = json.load(json_file) + return data + + +def setDynamicRange(network, json_file): + """Sets ranges for network layers.""" + quant_param_json = json_load(json_file) + act_quant = quant_param_json["act_quant_info"] + + for i in range(network.num_inputs): + input_tensor = network.get_input(i) + if act_quant.__contains__(input_tensor.name): + print(input_tensor.name) + value = act_quant[input_tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + input_tensor.dynamic_range = (tensor_min, tensor_max) + + for i in range(network.num_layers): + layer = network.get_layer(i) + + for output_index in range(layer.num_outputs): + tensor = layer.get_output(output_index) + + if act_quant.__contains__(tensor.name): + value = act_quant[tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + tensor.dynamic_range = (tensor_min, tensor_max) + else: + print("\033[1;32m%s\033[0m" % tensor.name) + + +def build_engine(onnx_file, json_file, engine_file): + builder = trt.Builder(TRT_LOGGER) + network = builder.create_network(EXPLICIT_BATCH) + + config = builder.create_builder_config() + + # If it is a dynamic onnx model , you need to add the following. + # profile = builder.create_optimization_profile() + # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) + # config.add_optimization_profile(profile) + + parser = trt.OnnxParser(network, TRT_LOGGER) + # config.max_workspace_size = GiB(1) + if not os.path.exists(onnx_file): + quit("ONNX file {} not found".format(onnx_file)) + + with open(onnx_file, "rb") as model: + if not parser.parse(model.read()): + print("ERROR: Failed to parse the ONNX file.") + for error in range(parser.num_errors): + print(parser.get_error(error)) + return None + + config.set_flag(trt.BuilderFlag.INT8) + + setDynamicRange(network, json_file) + + engine = builder.build_engine(network, config) + + with open(engine_file, "wb") as f: + f.write(engine.serialize()) + + +if __name__ == "__main__": + # Add plugins if needed + # import ctypes + # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") + parser = argparse.ArgumentParser( + description="Writing qparams to onnx to convert tensorrt engine." + ) + parser.add_argument("--onnx", type=str, default=None) + parser.add_argument("--qparam_json", type=str, default=None) + parser.add_argument("--engine", type=str, default=None) + arg = parser.parse_args() + + build_engine(arg.onnx, arg.qparam_json, arg.engine) + print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/calibration_dataset.py b/models/cv/classification/resnet_v1_d50/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..ec931c656abf5b2309dc9938490df46e4e8cdb19 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/calibration_dataset.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=True, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader diff --git a/models/cv/classification/resnet_v1_d50/ixrt/common.py b/models/cv/classification/resnet_v1_d50/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..0458195e5b7980ce70585d7284ca8a875afa3fd6 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/common.py @@ -0,0 +1,78 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/resnet_v1_d50/ixrt/config/RESNET_V1_D50_CONFIG b/models/cv/classification/resnet_v1_d50/ixrt/config/RESNET_V1_D50_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..5d1cb05d3ccbc471d3b61b98d20d4ee155eca12d --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/config/RESNET_V1_D50_CONFIG @@ -0,0 +1,33 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=Resnet_V1_D50 +ORIGINE_MODEL=resnet_v1_d50.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=minmax +QUANT_BATCHSIZE=32 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/resnet_v1_d50/ixrt/export_onnx.py b/models/cv/classification/resnet_v1_d50/ixrt/export_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..e1ef8d61e495024cb4f015562c9878daff53fc3a --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/export_onnx.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import torch +from mmpretrain import get_model +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = get_model('resnetv1d50_8xb32_in1k', pretrained=True) +model.cuda() +model.eval() +input = torch.randn(32, 3, 224, 224, device='cuda') +export_onnx_file = args.output_model + +torch.onnx.export(model, + input, + export_onnx_file, + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names = ['input'], + output_names = ['output'],) +print(" ") +print('Model has been converted to ONNX') +print("exit") +exit() diff --git a/models/cv/classification/resnet_v1_d50/ixrt/inference.py b/models/cv/classification/resnet_v1_d50/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..2c9dcb3f9cc5b9a26903651a31fafa16d8f0db31 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/inference.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/resnet_v1_d50/ixrt/modify_batchsize.py b/models/cv/classification/resnet_v1_d50/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..5466cb0712172cd7ab298ebb618cde396ec79187 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/resnet_v1_d50/ixrt/quant.py b/models/cv/classification/resnet_v1_d50/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..c728c7a128f7ba5a041160c9452980861c7a9071 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/quant.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: + +在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 + +This file will show you how to quantize your network with PPQ + You should prepare your model and calibration dataset as follow: + + ~/working/model.onnx <-- your model + ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset + +if you are using caffe model: + ~/working/model.caffemdoel <-- your model + ~/working/model.prototext <-- your model + +### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### + +quantized model will be generated at: ~/working/quantized.onnx +""" +from ppq import * +from ppq.api import * +import os +from calibration_dataset import getdataloader +import argparse +import random +import numpy as np +import torch + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + + +config = parse_args() + +# modify configuration below: +WORKING_DIRECTORY = 'checkpoints' # choose your working directory +TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform +MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE +INPUT_LAYOUT = 'chw' # input data layout, chw or hwc +NETWORK_INPUTSHAPE = [32, 3, 224, 224] # input shape of your network +EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. +REQUIRE_ANALYSE = False +TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 +# ------------------------------------------------------------------- +# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 +# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx +# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 +# ------------------------------------------------------------------- +graph = None +if MODEL_TYPE == NetworkFramework.ONNX: + graph = load_onnx_graph(onnx_import_file=config.model) +if MODEL_TYPE == NetworkFramework.CAFFE: + graph = load_caffe_graph( + caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), + prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) +assert graph is not None, 'Graph Loading Error, Check your input again.' + +# ------------------------------------------------------------------- +# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 +# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 +# ------------------------------------------------------------------- +QS = QuantizationSettingFactory.default_setting() + +# ------------------------------------------------------------------- +# 下面向你展示了如何使用 finetuning 过程提升量化精度 +# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 +# 开启他们的方式都是 QS.xxxx = True +# 按需使用,不要全部打开,容易起飞 +# ------------------------------------------------------------------- +if TRAINING_YOUR_NETWORK: + QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 + QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 + QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' + + +dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) +# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 +# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 +with ENABLE_CUDA_KERNEL(): + print('网络正量化中,根据你的量化配置,这将需要一段时间:') + quantized = quantize_native_model( + setting=QS, # setting 对象用来控制标准量化逻辑 + model=graph, + calib_dataloader=dataloader, + calib_steps=config.step, + input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 + inputs=None, + # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] + collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, + # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None + platform=TARGET_PLATFORM, + device=EXECUTING_DEVICE, + do_quantize=True) + + # ------------------------------------------------------------------- + # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor + # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 + # 请注意,必须在 export 之前执行此操作。 + # ------------------------------------------------------------------- + executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) + # output = executor.forward(input) + + # ------------------------------------------------------------------- + # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 + # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% + # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 + # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 + # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 + # ------------------------------------------------------------------- + print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') + reports = graphwise_error_analyse( + graph=quantized, running_device=EXECUTING_DEVICE, steps=32, + dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) + for op, snr in reports.items(): + if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') + + if REQUIRE_ANALYSE: + print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') + layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, + interested_outputs=None, + dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) + + # ------------------------------------------------------------------- + # 使用 export_ppq_graph 函数来导出量化后的模型 + # PPQ 会根据你所选择的导出平台来修改模型格式 + # ------------------------------------------------------------------- + print('网络量化结束,正在生成目标文件:') + export_ppq_graph( + graph=quantized, platform=TARGET_PLATFORM, + graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), + config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_model.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_model.py new file mode 100644 index 0000000000000000000000000000000000000000..6f1e6c2f6325651556267ceed7e4403a565a2f69 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/refine_model.py @@ -0,0 +1,290 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import argparse +import dataclasses + +import torch +import onnx + +from refine_utils.matmul_to_gemm_pass import FusedGemmPass +from refine_utils.linear_pass import FusedLinearPass + +from refine_utils.common import * + +def get_constant_input_name_of_operator(graph: Graph, operator: Operator): + const = None + for input in operator.inputs: + if not graph.containe_var(input): + continue + + if not graph.is_leaf_variable(input): + continue + + input_var = graph.get_variable(input) + if input_var.value is not None: + const = input + return const + +class FuseLayerNormPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + find_sequence_subgraph( + graph, + [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], + self.fuse_layer_norm, + strict=False + ) + return graph + + def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): + # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 + if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: + return + + # 检查 POW 的输入是否和 DIV 的输入是一致的 + if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: + return + + # 检查部分算子的输出是否被多个算子使用 + nodes = pattern.nodes + for node in [nodes[0]] + nodes[2:-1]: + next_ops = graph.get_next_operators(node.operator) + if len(next_ops) > 1: + return + + eps = None + for input in nodes[4].operator.inputs: + input_var = graph.get_variable(input) + if input_var.value is not None and graph.is_leaf_variable(input): + eps = to_py_type(input_var.value) + + scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) + bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + bias_var = graph.get_variable(bias) + print(bias_var) + + attributes = { + "axis": nodes[0].operator.attributes.axes, + "epsilon": eps, + } + + + layer_norm_op = self.transform.make_operator( + op_type="LayerNormalization", + inputs=[nodes[0].operator.inputs[0], scale, bias], + outputs=[nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(layer_norm_op) + +class FusedGeluPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True + ) + return graph + + def fuse_gelu(self, graph: Graph, pattern: PatternGraph): + nodes = pattern.nodes + prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] + next_ops = self.transform.get_next_operators(prev_op) + if len(next_ops) != 2: + return + + if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: + return + + gelu_op_input = None + for input in nodes[3].operator.inputs: + if input in nodes[0].operator.inputs: + gelu_op_input = input + break + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + gelu_op = self.transform.make_operator( + op_type=OP.GELU, + inputs=[gelu_op_input], + outputs=[nodes[-1].operator.outputs[0]] + ) + self.transform.add_operator(gelu_op) + +@dataclasses.dataclass +class NormalizeAttr(BaseOperatorAttr): + p: float = 2.0 + epsilon: float = 1e-12 + axis: int = 1 + + +@registe_operator(OP.GELU) +class GeluOperator(BaseOperator): + + def call( + self, + executor, + operator: Operator, + inputs: List, + attr: NormalizeAttr, + ): + return F.gelu(inputs[0]) + + def convert_onnx_operator( + self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto + ) -> Operator: + return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) + + def quantize( + self, + graph: Graph, + op: Operator, + operator_observer_config: QuantOperatorObserverConfig, + quant_outputs: bool = False, + ): + return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) + + + +class ClearUnsedVariables(BasePass): + + def process(self, graph: Graph) -> Graph: + vars = list(graph.variables) + + for var in vars: + if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): + graph.delete_variable(var) + + quant_params = list(graph.quant_parameters.keys()) + for var in quant_params: + if not graph.containe_var(var): + graph.quant_parameters.pop(var) + + return graph + +class FormatLayerNorm(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if "LayerNorm" in op.op_type: + self.format_layer_norm(graph, op) + return graph + + def format_layer_norm(self, graph, operator): + if not hasattr(operator.attributes, "axis"): + return + if isinstance(operator.attributes.axis, (tuple, list)): + operator.attributes.axis = operator.attributes.axis[0] + +class FormatReshape(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if op.op_type == "Reshape": + self.format_reshape(graph, op) + + return graph + + def format_reshape(self, graph, operator): + shape = graph.get_variable(operator.inputs[1]) + shape.value = torch.tensor(shape.value, dtype=torch.int64) + +class FormatScalar(BasePass): + + def process(self, graph: Graph): + for var in graph.variables.values(): + var: Variable + use_ops = graph.get_dst_operators(var) + + if len(use_ops) == 0: + continue + + if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: + continue + + if var.value is not None and var.value.ndim == 0: + var.value = var.value.reshape(1) + print(f"Reshape scalar to tensor for {var.name}.") + + return graph + +class RenamePass(BasePass): + + def process(self, graph:Graph): + + names = [name for name in graph.operators.keys()] + for old_name in names: + new_name = old_name.replace("/", "#") + + graph.rename_operator(old_name, new_name) + + names = [name for name in graph.variables.keys()] + for name in names: + new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") + + graph.rename_vaiable(name, new_name, + with_variables=True, + with_operator_outputs=True) + + return graph + +def create_pipeline(example_inputs): + return PassSequence( + # FuseLayerNormPass(), + FusedGeluPass(), + + # ClearUnsedVariables(), + # FormatLayerNorm(), + # FormatReshape(), + # FormatScalar(), + # RenamePass() + ) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--onnx_path", type=str) + parser.add_argument("--dst_onnx_path", type=str) + + parser.add_argument("--bsz", type=int, default=8, + help="Batch size") + parser.add_argument("--imgsz", type=int, default=224, + help="Image size") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) + + refine_pipline = Pipeline( + create_source(f"{args.onnx_path}", example_inputs=example_inputs), + create_pipeline(example_inputs), + create_target( + f"{args.dst_onnx_path}", + example_inputs=example_inputs, + ) + ) + refine_pipline.run() + + print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/__init__.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/common.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/common.py new file mode 100644 index 0000000000000000000000000000000000000000..2af19a14df73cea6ba27ad6a8ad020fe0bec7aaa --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/common.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from typing import Union, Callable, List + +from tensorrt.deploy.api import * +from tensorrt.deploy.backend.onnx.converter import default_converter +from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type +from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr +from tensorrt.deploy.ir.operator_type import OperatorType as OP +from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name +from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence +from tensorrt.deploy.ir import Graph +from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator +from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator + +def find_sequence_subgraph(graph, + pattern: Union[List[str], PatternGraph], + callback: Callable[[Graph, PatternGraph], None], + strict=True): + if isinstance(pattern, List): + pattern = build_sequence_graph(pattern) + + matcher = GraphMatcher(pattern, strict=strict) + return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/linear_pass.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/linear_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..29b5e4a96e6edc448168bd78ede3111f6b59c032 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/linear_pass.py @@ -0,0 +1,113 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import dataclasses + +from refine_utils.common import * + +# AXB=C, Only for B is initializer + +class FusedLinearPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True + ) + find_sequence_subgraph( + graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True + ) + return graph + + def to_linear_with_bias(self, graph, pattern: PatternGraph): + matmul = pattern.nodes[0] + add = pattern.nodes[1] + if len(add.operator.inputs) != 2: + return + + b_var = graph.get_variable(matmul.operator.inputs[1]) + if not graph.is_leaf_variable(b_var) or b_var.value is None: + return + + if b_var.value.ndim != 2: + return + + bias_var = None + for input in add.operator.inputs: + if input not in matmul.operator.outputs: + bias_var = input + + inputs = matmul.operator.inputs + inputs.append(bias_var) + outputs = add.operator.outputs + + b_var.value = b_var.value.transpose(1, 0) + b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] + + hidden_size = b_var.shape[1] + linear_dim = b_var.shape[0] + + attributes = { + "hidden_size": hidden_size, + "linear_dim": linear_dim, + "has_bias": 1, + "act_type":"none" + } + + self.transform.make_operator( + "LinearFP16", + inputs=inputs, + outputs=outputs, + **attributes + ) + + self.transform.delete_operator(add.operator) + self.transform.delete_operator(matmul.operator) + + def to_linear(self, graph, pattern: PatternGraph): + matmul = pattern.nodes[0] + if len(matmul.operator.inputs) != 2: + return + + b_var = graph.get_variable(matmul.operator.inputs[1]) + if not graph.is_leaf_variable(b_var) or b_var.value is None: + return + + if b_var.value.ndim != 2: + return + + attributes = { + "hidden_size": hidden_size, + "linear_dim": linear_dim, + "has_bias": 0, + "act_type": "none" + } + + b_var.value = b_var.value.transpose(1, 0) + b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] + + hidden_size = b_var.shape[1] + linear_dim = b_var.shape[0] + + op = self.transform.make_operator( + op_type = "LinearFP16", + inputs = pattern.nodes[0].operator.inputs, + outputs=[pattern.nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(op) + + self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/matmul_to_gemm_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..4ebfac4d917d6b05e46187f025c3c17184096e80 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/matmul_to_gemm_pass.py @@ -0,0 +1,54 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from refine_utils.common import * + +# +# Common pattern Matmul to Gemm +# +class FusedGemmPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True + ) + return graph + + def to_gemm(self, graph, pattern: PatternGraph): + matmul_op = pattern.nodes[0] + inputs = matmul_op.operator.inputs + outputs = matmul_op.operator.outputs + + if len(inputs)!=2 and len(outputs)!=1: + return + + for input in inputs: + if self.transform.is_leaf_variable(input): + return + + print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") + self.transform.delete_operator(matmul_op.operator) + + op = self.transform.make_operator( + op_type = "Gemm", + inputs = inputs, + outputs = outputs, + alpha = 1, + beta = 1, + transB = 1 + ) + + self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..b743d7084ae058118c29daaf494769fc293ceb41 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..e7a4f1a7276406a0ed7400af4368b5bec2a06e06 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..2b2db01a5ee8a6e1404e98469d078d4597e074c3 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +set -x +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=int8 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..bec51520cd3b29d2585cada59c7bdee43971e95c --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=int8 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/simplify_model.py b/models/cv/classification/resnet_v1_d50/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..9948a9fa083ff99ff88e556e96614b02cccaa965 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/simplify_model.py @@ -0,0 +1,40 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/classification/squeezenet_1.1/ixrt/README.md b/models/cv/classification/squeezenet_1.1/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..93377d88fb647398965fecf0948b7d5f03f34857 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/README.md @@ -0,0 +1,63 @@ +# SqueezeNet 1.1 + +## Description +SqueezeNet 1.1 is a deep learning model for image classification, designed to be lightweight and efficient for deployment on resource-constrained devices. + +It was developed by researchers at DeepScale and released in 2016. +## Setup + +### Install +``` +yum install mesa-libGL +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install ppq +pip3 install pycuda +pip3 install opencv-python==4.6.0.66 +``` +### Download +Pretrained model: https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth + +Dataset: https://www.image-net.org/download.php to download the validation dataset. + +### Model Conversion +``` +mkdir checkpoints +python3 export_onnx.py --origin_model /path/to/squeezenet1_1-b8a52dc0.pth --output_model checkpoints/squeezenetv11.onnx +``` + +## Inference +``` +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +export CONFIG_DIR=config/SQUEEZENET_V11_CONFIG + +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_squeezenet_v11_fp16_accuracy.sh +# Performance +bash scripts/infer_squeezenet_v11_fp16_performance.sh +``` + +### INT8 + +```bash +# Accuracy +bash scripts/infer_squeezenet_v11_int8_accuracy.sh +# Performance +bash scripts/infer_squeezenet_v11_int8_performance.sh +``` + +## Results +Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) +---------------|-----------|----------|---------|----------|-------- +SqueezeNet 1.1 | | FP16 | 13701 | 0.58182 | 0.80622 +SqueezeNet 1.1 | | INT8 | 20128 | 0.50966 | 0.77552 + diff --git a/models/cv/classification/squeezenet_1.1/ixrt/build_engine.py b/models/cv/classification/squeezenet_1.1/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..32f549d8f2a0342be9103deab04071cebe913064 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/build_engine.py @@ -0,0 +1,53 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/build_i8_engine.py b/models/cv/classification/squeezenet_1.1/ixrt/build_i8_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..6038b33f50cff7a14efcefa6673ae9d2fd19870b --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/build_i8_engine.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import json +import os + +import tensorrt +import tensorrt as trt + +TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) + +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + +def GiB(val): + return val * 1 << 30 + + +def json_load(filename): + with open(filename) as json_file: + data = json.load(json_file) + return data + + +def setDynamicRange(network, json_file): + """Sets ranges for network layers.""" + quant_param_json = json_load(json_file) + act_quant = quant_param_json["act_quant_info"] + + for i in range(network.num_inputs): + input_tensor = network.get_input(i) + if act_quant.__contains__(input_tensor.name): + print(input_tensor.name) + value = act_quant[input_tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + input_tensor.dynamic_range = (tensor_min, tensor_max) + + for i in range(network.num_layers): + layer = network.get_layer(i) + + for output_index in range(layer.num_outputs): + tensor = layer.get_output(output_index) + + if act_quant.__contains__(tensor.name): + value = act_quant[tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + tensor.dynamic_range = (tensor_min, tensor_max) + else: + print("\033[1;32m%s\033[0m" % tensor.name) + + +def build_engine(onnx_file, json_file, engine_file): + builder = trt.Builder(TRT_LOGGER) + network = builder.create_network(EXPLICIT_BATCH) + + config = builder.create_builder_config() + + # If it is a dynamic onnx model , you need to add the following. + # profile = builder.create_optimization_profile() + # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) + # config.add_optimization_profile(profile) + + parser = trt.OnnxParser(network, TRT_LOGGER) + # config.max_workspace_size = GiB(1) + if not os.path.exists(onnx_file): + quit("ONNX file {} not found".format(onnx_file)) + + with open(onnx_file, "rb") as model: + if not parser.parse(model.read()): + print("ERROR: Failed to parse the ONNX file.") + for error in range(parser.num_errors): + print(parser.get_error(error)) + return None + + config.set_flag(trt.BuilderFlag.INT8) + + setDynamicRange(network, json_file) + + engine = builder.build_engine(network, config) + + with open(engine_file, "wb") as f: + f.write(engine.serialize()) + + +if __name__ == "__main__": + # Add plugins if needed + # import ctypes + # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") + parser = argparse.ArgumentParser( + description="Writing qparams to onnx to convert tensorrt engine." + ) + parser.add_argument("--onnx", type=str, default=None) + parser.add_argument("--qparam_json", type=str, default=None) + parser.add_argument("--engine", type=str, default=None) + arg = parser.parse_args() + + build_engine(arg.onnx, arg.qparam_json, arg.engine) + print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/calibration_dataset.py b/models/cv/classification/squeezenet_1.1/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..b394c76b517cc72d5a1e2ee054b3e9fcc629dfef --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/calibration_dataset.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/common.py b/models/cv/classification/squeezenet_1.1/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..abdc147cb913da3736ab2bc72628dc9cebf78d36 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/common.py @@ -0,0 +1,79 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/squeezenet_1.1/ixrt/config/SQUEEZENET_V11_CONFIG b/models/cv/classification/squeezenet_1.1/ixrt/config/SQUEEZENET_V11_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..efc2a0870434a41575d490a17821a8ad816c81b9 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/config/SQUEEZENET_V11_CONFIG @@ -0,0 +1,19 @@ +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=SqueezeNet_v11 +ORIGINE_MODEL=squeezenetv11.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/squeezenet_1.1/ixrt/export_onnx.py b/models/cv/classification/squeezenet_1.1/ixrt/export_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..2ff4fe090edafbdab7e904a9409a287aa0735864 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/export_onnx.py @@ -0,0 +1,45 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import torch +import torchvision.models as models +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = models.squeezenet1_1() +model.load_state_dict(torch.load(args.origin_model)) +model.cuda() +model.eval() +input = torch.randn(1, 3, 224, 224, device='cuda') +export_onnx_file = args.output_model + +torch.onnx.export(model, + input, + export_onnx_file, + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names = ['input'], + output_names = ['output'],) +print(" ") +print('Model has been converted to ONNX') +print("exit") +exit() \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/inference.py b/models/cv/classification/squeezenet_1.1/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..1ec56b4a1f09ee4bd7516461f758ac121a5346a0 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/inference.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/squeezenet_1.1/ixrt/modify_batchsize.py b/models/cv/classification/squeezenet_1.1/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..4ac42a3084920c449bb80494518c5fedc8c64316 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/squeezenet_1.1/ixrt/quant.py b/models/cv/classification/squeezenet_1.1/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..5d71c828629bb0370aa40c5bcdcf117812bbaedc --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/quant.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: + +在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 + +This file will show you how to quantize your network with PPQ + You should prepare your model and calibration dataset as follow: + + ~/working/model.onnx <-- your model + ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset + +if you are using caffe model: + ~/working/model.caffemdoel <-- your model + ~/working/model.prototext <-- your model + +### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### + +quantized model will be generated at: ~/working/quantized.onnx +""" +from ppq import * +from ppq.api import * +import os +from calibration_dataset import getdataloader +import argparse +import random +import numpy as np +import torch + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + + +config = parse_args() + +# modify configuration below: +WORKING_DIRECTORY = 'checkpoints' # choose your working directory +TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform +MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE +INPUT_LAYOUT = 'chw' # input data layout, chw or hwc +NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network +EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. +REQUIRE_ANALYSE = False +TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 +# ------------------------------------------------------------------- +# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 +# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx +# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 +# ------------------------------------------------------------------- +graph = None +if MODEL_TYPE == NetworkFramework.ONNX: + graph = load_onnx_graph(onnx_import_file=config.model) +if MODEL_TYPE == NetworkFramework.CAFFE: + graph = load_caffe_graph( + caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), + prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) +assert graph is not None, 'Graph Loading Error, Check your input again.' + +# ------------------------------------------------------------------- +# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 +# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 +# ------------------------------------------------------------------- +QS = QuantizationSettingFactory.default_setting() + +# ------------------------------------------------------------------- +# 下面向你展示了如何使用 finetuning 过程提升量化精度 +# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 +# 开启他们的方式都是 QS.xxxx = True +# 按需使用,不要全部打开,容易起飞 +# ------------------------------------------------------------------- +if TRAINING_YOUR_NETWORK: + QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 + QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 + QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' + + +dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) +# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 +# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 +with ENABLE_CUDA_KERNEL(): + print('网络正量化中,根据你的量化配置,这将需要一段时间:') + quantized = quantize_native_model( + setting=QS, # setting 对象用来控制标准量化逻辑 + model=graph, + calib_dataloader=dataloader, + calib_steps=config.step, + input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 + inputs=None, + # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] + collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, + # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None + platform=TARGET_PLATFORM, + device=EXECUTING_DEVICE, + do_quantize=True) + + # ------------------------------------------------------------------- + # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor + # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 + # 请注意,必须在 export 之前执行此操作。 + # ------------------------------------------------------------------- + executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) + # output = executor.forward(input) + + # ------------------------------------------------------------------- + # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 + # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% + # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 + # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 + # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 + # ------------------------------------------------------------------- + print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') + reports = graphwise_error_analyse( + graph=quantized, running_device=EXECUTING_DEVICE, steps=32, + dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) + for op, snr in reports.items(): + if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') + + if REQUIRE_ANALYSE: + print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') + layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, + interested_outputs=None, + dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) + + # ------------------------------------------------------------------- + # 使用 export_ppq_graph 函数来导出量化后的模型 + # PPQ 会根据你所选择的导出平台来修改模型格式 + # ------------------------------------------------------------------- + print('网络量化结束,正在生成目标文件:') + export_ppq_graph( + graph=quantized, platform=TARGET_PLATFORM, + graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), + config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/squeezenet_1.1/ixrt/refine_model.py b/models/cv/classification/squeezenet_1.1/ixrt/refine_model.py new file mode 100644 index 0000000000000000000000000000000000000000..000ee4dcbf3df294a34cd83c97527bba00024ac7 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/refine_model.py @@ -0,0 +1,291 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import dataclasses + +import torch +import onnx + +from refine_utils.matmul_to_gemm_pass import FusedGemmPass +from refine_utils.linear_pass import FusedLinearPass + +from refine_utils.common import * + +def get_constant_input_name_of_operator(graph: Graph, operator: Operator): + const = None + for input in operator.inputs: + if not graph.containe_var(input): + continue + + if not graph.is_leaf_variable(input): + continue + + input_var = graph.get_variable(input) + if input_var.value is not None: + const = input + return const + +class FuseLayerNormPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + find_sequence_subgraph( + graph, + [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], + self.fuse_layer_norm, + strict=False + ) + return graph + + def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): + # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 + if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: + return + + # 检查 POW 的输入是否和 DIV 的输入是一致的 + if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: + return + + # 检查部分算子的输出是否被多个算子使用 + nodes = pattern.nodes + for node in [nodes[0]] + nodes[2:-1]: + next_ops = graph.get_next_operators(node.operator) + if len(next_ops) > 1: + return + + eps = None + for input in nodes[4].operator.inputs: + input_var = graph.get_variable(input) + if input_var.value is not None and graph.is_leaf_variable(input): + eps = to_py_type(input_var.value) + + scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) + bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + bias_var = graph.get_variable(bias) + print(bias_var) + + attributes = { + "axis": nodes[0].operator.attributes.axes, + "epsilon": eps, + } + + + layer_norm_op = self.transform.make_operator( + op_type="LayerNormalization", + inputs=[nodes[0].operator.inputs[0], scale, bias], + outputs=[nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(layer_norm_op) + +class FusedGeluPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True + ) + return graph + + def fuse_gelu(self, graph: Graph, pattern: PatternGraph): + nodes = pattern.nodes + prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] + next_ops = self.transform.get_next_operators(prev_op) + if len(next_ops) != 2: + return + + if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: + return + + gelu_op_input = None + for input in nodes[3].operator.inputs: + if input in nodes[0].operator.inputs: + gelu_op_input = input + break + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + gelu_op = self.transform.make_operator( + op_type=OP.GELU, + inputs=[gelu_op_input], + outputs=[nodes[-1].operator.outputs[0]] + ) + self.transform.add_operator(gelu_op) + +@dataclasses.dataclass +class NormalizeAttr(BaseOperatorAttr): + p: float = 2.0 + epsilon: float = 1e-12 + axis: int = 1 + + +@registe_operator(OP.GELU) +class GeluOperator(BaseOperator): + + def call( + self, + executor, + operator: Operator, + inputs: List, + attr: NormalizeAttr, + ): + return F.gelu(inputs[0]) + + def convert_onnx_operator( + self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto + ) -> Operator: + return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) + + def quantize( + self, + graph: Graph, + op: Operator, + operator_observer_config: QuantOperatorObserverConfig, + quant_outputs: bool = False, + ): + return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) + + + +class ClearUnsedVariables(BasePass): + + def process(self, graph: Graph) -> Graph: + vars = list(graph.variables) + + for var in vars: + if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): + graph.delete_variable(var) + + quant_params = list(graph.quant_parameters.keys()) + for var in quant_params: + if not graph.containe_var(var): + graph.quant_parameters.pop(var) + + return graph + +class FormatLayerNorm(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if "LayerNorm" in op.op_type: + self.format_layer_norm(graph, op) + return graph + + def format_layer_norm(self, graph, operator): + if not hasattr(operator.attributes, "axis"): + return + if isinstance(operator.attributes.axis, (tuple, list)): + operator.attributes.axis = operator.attributes.axis[0] + +class FormatReshape(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if op.op_type == "Reshape": + self.format_reshape(graph, op) + + return graph + + def format_reshape(self, graph, operator): + shape = graph.get_variable(operator.inputs[1]) + shape.value = torch.tensor(shape.value, dtype=torch.int64) + +class FormatScalar(BasePass): + + def process(self, graph: Graph): + for var in graph.variables.values(): + var: Variable + use_ops = graph.get_dst_operators(var) + + if len(use_ops) == 0: + continue + + if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: + continue + + if var.value is not None and var.value.ndim == 0: + var.value = var.value.reshape(1) + print(f"Reshape scalar to tensor for {var.name}.") + + return graph + +class RenamePass(BasePass): + + def process(self, graph:Graph): + + names = [name for name in graph.operators.keys()] + for old_name in names: + new_name = old_name.replace("/", "#") + + graph.rename_operator(old_name, new_name) + + names = [name for name in graph.variables.keys()] + for name in names: + new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") + + graph.rename_vaiable(name, new_name, + with_variables=True, + with_operator_outputs=True) + + return graph + +def create_pipeline(example_inputs): + return PassSequence( + # FuseLayerNormPass(), + FusedGeluPass(), + + # ClearUnsedVariables(), + # FormatLayerNorm(), + # FormatReshape(), + # FormatScalar(), + # RenamePass() + ) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--onnx_path", type=str) + parser.add_argument("--dst_onnx_path", type=str) + + parser.add_argument("--bsz", type=int, default=8, + help="Batch size") + parser.add_argument("--imgsz", type=int, default=224, + help="Image size") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) + + refine_pipline = Pipeline( + create_source(f"{args.onnx_path}", example_inputs=example_inputs), + create_pipeline(example_inputs), + create_target( + f"{args.dst_onnx_path}", + example_inputs=example_inputs, + ) + ) + refine_pipline.run() + + print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_accuracy.sh b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..30890ad693627502bf77e5f67dbfb7c434e36a40 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_accuracy.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_performance.sh b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..dbd6c8c705139de8ec0fd1a1b27f149f07e1d859 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_performance.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_accuracy.sh b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..2b2db01a5ee8a6e1404e98469d078d4597e074c3 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_accuracy.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +set -x +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=int8 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_performance.sh b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..bec51520cd3b29d2585cada59c7bdee43971e95c --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_performance.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=int8 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/simplify_model.py b/models/cv/classification/squeezenet_1.1/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..4d53a474011539600c4cf2b92617fa4e51e18273 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/simplify_model.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/detection/detr/ixrt/README.md b/models/cv/detection/detr/ixrt/README.md new file mode 100755 index 0000000000000000000000000000000000000000..e1b76b56c05cc68ada7719118d6fc3cf45e69754 --- /dev/null +++ b/models/cv/detection/detr/ixrt/README.md @@ -0,0 +1,55 @@ +# Detr + +## Description +DETR (DEtection TRansformer) is a novel approach that views object detection as a direct set prediction problem. This method streamlines the detection process, eliminating the need for many hand-designed components like non-maximum suppression procedures or anchor generation, which are typically used to explicitly encode prior knowledge about the task. + +## Setup + +### Install +```bash +yum install mesa-libGL +pip3 install tqdm +pip3 install pycuda +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install cv2 +pip3 install pycocotools +pip3 install opencv-python==4.6.0.66 +``` + +### Download +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion +```bash +mkdir checkpoints +python3 export_model.py --torch_file /path/to/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth --onnx_file checkpoints/detr_res50.onnx --bsz 1 +``` + +## Inference +```bash +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/coco2017/ +export CHECKPOINTS_DIR=./checkpoints +export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +export EVAL_DIR=${DATASETS_DIR}/val2017 +export RUN_DIR=./ +export CONFIG_DIR=config/DETR_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_detr_fp16_accuracy.sh +# Performance +bash scripts/infer_detr_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 +--------|-----------|----------|----------|----------|------------ +Detr | 1 | FP16 | 65.84 | 0.370 | 0.198 \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/build_engine.py b/models/cv/detection/detr/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..3a00b513e32868a6f0abd5a23a9293bb4a907c08 --- /dev/null +++ b/models/cv/detection/detr/ixrt/build_engine.py @@ -0,0 +1,59 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + # parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + if precision == tensorrt.BuilderFlag.INT8: + parser.parse_from_files(config.model, config.quant_file) + else: + parser.parse_from_file(config.model) + + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--quant_file", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/calibration_dataset.py b/models/cv/detection/detr/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..de37775a0c617fdefca4342423a6a47bdc9b9c41 --- /dev/null +++ b/models/cv/detection/detr/ixrt/calibration_dataset.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/coco_labels.py b/models/cv/detection/detr/ixrt/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..43f5bd82cd257efdcab2bdba6bad64d9bb90416e --- /dev/null +++ b/models/cv/detection/detr/ixrt/coco_labels.py @@ -0,0 +1,104 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/detection/detr/ixrt/common.py b/models/cv/detection/detr/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..c8d4a7b94b52e7142a294a36d294aa0c5a72f609 --- /dev/null +++ b/models/cv/detection/detr/ixrt/common.py @@ -0,0 +1,116 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import numpy as np +from tqdm import tqdm + +import tensorrt +import pycuda.driver as cuda + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result): + for i, boxes in enumerate(pred_boxes): + image_id = int(batch_img_id) + if boxes is not None: + x, y, w, h, c, p = boxes + if image_id!=-1: + + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": c, + "bbox": [x, y, w, h], + "score": p, + } + ) + +# def save2json(batch_img_id, pred_boxes, json_result, class_trans): +# for i, boxes in enumerate(pred_boxes): +# if boxes is not None: +# image_id = int(batch_img_id[i]) +# # have no target +# if image_id == -1: +# continue +# for x, y, w, h, c, p in boxes: +# x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) +# c = int(c) +# json_result.append( +# { +# "image_id": image_id, +# "category_id": class_trans[c - 1], +# "bbox": [x, y, w, h], +# "score": p, +# } +# ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/config/DETR_CONFIG b/models/cv/detection/detr/ixrt/config/DETR_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..ec9562e170690a2249677444cfcd83ab751f88de --- /dev/null +++ b/models/cv/detection/detr/ixrt/config/DETR_CONFIG @@ -0,0 +1,44 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=800 +MODEL_NAME=detr +ORIGINE_MODEL=detr_res50.onnx +DATA_PROCESS_TYPE=detr +MODEL_INPUT_NAMES=(inputs) + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/__init__.py b/models/cv/detection/detr/ixrt/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..162e24b462289dcee7b7a2888b93fad1115def81 --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/coco.py b/models/cv/detection/detr/ixrt/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..73c5df54761b917ecd0127fb56b61d9bd34c1196 --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/coco.py @@ -0,0 +1,131 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/detection/detr/ixrt/datasets/common.py b/models/cv/detection/detr/ixrt/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..febaf0ea228aeaf7f15e7015a46e339126997abf --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/common.py @@ -0,0 +1,83 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + + return boxes \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/post_process.py b/models/cv/detection/detr/ixrt/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..91afc4b6753849ba22ab83685d69dccd880e645e --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/post_process.py @@ -0,0 +1,173 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np +import torch +import torch.nn.functional as F + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + elif data_process_type == "detr": + return DetrPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def box_cxcywh_to_xyxy(x): + x_c, y_c, w, h = x.unbind(-1) + b = [(x_c - 0.5 * w), (y_c - 0.5 * h), + (x_c + 0.5 * w), (y_c + 0.5 * h)] + return torch.stack(b, dim=-1) + + +def convert_to_xywh(boxes): + xmin, ymin, xmax, ymax = boxes.unbind(-1) + return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) + +def DetrPostprocess(pred_logits, pred_boxes, target_sizes): + + out_logits = torch.from_numpy(pred_logits) + out_bbox = torch.from_numpy(pred_boxes) + assert len(target_sizes) == 2 + + prob = F.softmax(out_logits, -1) + scores, labels = prob[..., :-1].max(-1) + + # convert to [x0, y0, x1, y1] format + boxes = box_cxcywh_to_xyxy(out_bbox) + # and from relative [0, 1] to absolute [0, height] coordinates + img_w, img_h = target_sizes + scale_fct = torch.tensor([img_w, img_h, img_w, img_h]) + boxes = boxes * scale_fct + + + boxes = clip_boxes(boxes, target_sizes) + boxes = convert_to_xywh(boxes) + + labels = labels.unsqueeze(1) + scores =scores.unsqueeze(1) + pred_boxes = torch.cat([ + boxes, + labels, + scores], dim=1).numpy().tolist() + return pred_boxes \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/pre_process.py b/models/cv/detection/detr/ixrt/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..c7f490df4eabc90a67f04b4ef54e7118dac51b2a --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/pre_process.py @@ -0,0 +1,91 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + elif data_process_type == "detr": + return DetrPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img + +def DetrPreprocess(image, img_size): + # img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) + # img = img.resize((img_size, img_size)) + + std = [0.485, 0.456, 0.406] + mean = [0.229, 0.224, 0.225] + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + + image[0,:,:] = (image[0,:,:]- std[0])/mean[0] + image[1,:,:] = (image[1,:,:]- std[1])/mean[1] + image[2,:,:] = (image[2,:,:]- std[2])/mean[2] + + return image + \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/vision.py b/models/cv/detection/detr/ixrt/datasets/vision.py new file mode 100755 index 0000000000000000000000000000000000000000..eadefb2c5b35abd0a11fa85c65891461a210aef8 --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/vision.py @@ -0,0 +1,151 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/detection/detr/ixrt/deploy.py b/models/cv/detection/detr/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..d1052d2b2c522af2a1991faa8bba5e791da635f5 --- /dev/null +++ b/models/cv/detection/detr/ixrt/deploy.py @@ -0,0 +1,123 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/export_model.py b/models/cv/detection/detr/ixrt/export_model.py new file mode 100644 index 0000000000000000000000000000000000000000..55385fb3b4745eba41aa2ba3a538277883a443c9 --- /dev/null +++ b/models/cv/detection/detr/ixrt/export_model.py @@ -0,0 +1,121 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse + +import torch +import onnx +from onnx import shape_inference +from onnxsim import simplify + + +validate=True + +def stat_model(onnx_file): + onnx_model = onnx.load(onnx_file) + graph = onnx_model.graph + + op_types = list() + for node in graph.node: + op_types.append(node.op_type) + + print(set(op_types)) + +def ort_inference(onnx_file, input): + import onnxruntime as ort + + ort_session = ort.InferenceSession(onnx_file, + providers=['CPUExecutionProvider']) + in_name = ort_session.get_inputs()[0].name + + onnx_outputs = ort_session.get_outputs() + output_names = [] + for o in onnx_outputs: + output_names.append(o.name) + + input_np = input.clone().cpu().numpy() + out = ort_session.run(output_names, + input_feed={in_name: input_np} + ) + return out + +def convert_model(onnx_file, config): + model = torch.hub.load('facebookresearch/detr:main', 'detr_resnet50', pretrained=True) + model.eval() + + input = torch.randn([config.bsz, 3, config.img_H, config.img_W]) + out = model(input) + torch.onnx.export( + model, + input, + onnx_file, + verbose = False, + input_names = ["input"], + output_names = ["pred_logits","pred_boxes"], + opset_version = 11 + ) + + onnx_model = onnx.load(onnx_file) # load onnx model + model_simp, check = simplify(onnx_model) + assert check, "Simplified ONNX model could not be validated" + + onnx_model = shape_inference.infer_shapes(model_simp) + + onnx.save(onnx_model, onnx_file) + print('finished exporting onnx') + + # stat_model(onnx_file) + + if validate: + torch_out = model(input)["pred_logits"] + onnx_out = ort_inference(onnx_file, input)[0] + + import numpy as np + torch_out = torch_out.detach().numpy() + diff = np.abs(torch_out-onnx_out).max() + print(diff) + #sim = cosine_similarity(torch_out.reshape(1,-1), onnx_out.reshape(1, -1)) + #print(sim[0]) + + +def parse_config(): + parser = argparse.ArgumentParser() + + parser.add_argument("--torch_file", type=str, help="torch model") + parser.add_argument("--onnx_file", type=str, help="onnx model",default="") + parser.add_argument("--bsz", type=int, default=1, help="test batch size") + parser.add_argument( + "--img_H", + type=int, + default=800, + help="inference size h", + ) + parser.add_argument( + "--img_W", + type=int, + default=800, + help="inference size W", + ) + + + config = parser.parse_args() + return config + +if __name__ == "__main__": + + config = parse_config() + onnx_file = config.onnx_file + convert_model(onnx_file, config) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/extract_graph_weight.py b/models/cv/detection/detr/ixrt/extract_graph_weight.py new file mode 100644 index 0000000000000000000000000000000000000000..9094316e46808c885edab489a61cb7ab8ac56cbf --- /dev/null +++ b/models/cv/detection/detr/ixrt/extract_graph_weight.py @@ -0,0 +1,139 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import collections +import json +import os + +import numpy as np +import onnx + +def parse_onnx_model(onnx_model): + graph = onnx_model.graph + nodes = graph.node + initializer = graph.initializer + value_info = graph.value_info + model = {} + model["nodes"] = {} + model["tensors"] = {} + model["edges"] = {} + all_edge = [] + for i, item in enumerate(nodes): + node_name = item.name + input_edge_list = list(item.input) + output_edge_list = list(item.output) + all_edge.extend(input_edge_list) + all_edge.extend(output_edge_list) + node_dict = {"inputs": input_edge_list, "outputs": output_edge_list} + node_dict["op_type"] = item.op_type + attribute_dict = {} + for attr in item.attribute: + if attr.type == onnx.AttributeProto().AttributeType.FLOAT: + attribute_dict[attr.name] = attr.f + if attr.type == onnx.AttributeProto().AttributeType.FLOATS: + attribute_dict[attr.name] = [x for x in attr.floats] + if attr.type == onnx.AttributeProto().AttributeType.INT: + attribute_dict[attr.name] = attr.i + if attr.type == onnx.AttributeProto().AttributeType.INTS: + attribute_dict[attr.name] = [x for x in attr.ints] + if attr.type == onnx.AttributeProto().AttributeType.STRING: + attribute_dict[attr.name] = str(attr.s.decode("UTF-8")) + if attr.type == onnx.AttributeProto().AttributeType.STRINGS: + attribute_dict[attr.name] = [ + str(x.decode("UTF-8")) for x in attr.strings + ] + node_dict["attrbiute"] = attribute_dict + model["nodes"][node_name] = node_dict + + constant_edge = [] + for i, item in enumerate(initializer): + tensor_name = item.name + constant_edge.append(tensor_name) + if item.data_type == 1: + tensor_dict = {"data_type": "float32"} + elif item.data_type == 3: + tensor_dict = {"data_type": "int32"} + elif item.data_type == 7: + tensor_dict = {"data_type": "int64"} + tensor_dict["dims"] = list(item.dims) + + model["tensors"][tensor_name] = tensor_dict + + miss_edge = [] + for edge in all_edge: + if edge not in constant_edge: + miss_edge.append(edge) + + for info in value_info: + info_name = info.name + if info_name in miss_edge: + edge_dict = { + "dims": [int(x.dim_value) for x in info.type.tensor_type.shape.dim] + } + model["edges"][info_name] = edge_dict + + """ + Export weight + """ + var_dict = collections.OrderedDict() + for item in initializer: + tensor_name = item.name + tensor_shape = list(item.dims) + if len(tensor_shape) == 0: + continue + + if item.data_type == 1 and len(item.float_data): + np_data = np.array(list(item.float_data), dtype=np.float32) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 1 and len(item.raw_data): + np_data = np.frombuffer(item.raw_data, dtype=np.float32) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 3 and len(item.int32_data): + np_data = np.array(list(item.int32_data), dtype=np.int32) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 3 and len(item.raw_data): + np_data = np.frombuffer(item.raw_data, dtype=np.int32) + np_data.dtype = np.int32 + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 7 and len(item.raw_data): + np_data = np.frombuffer(item.raw_data, dtype=np.int64) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 7 and len(item.int64_data): + temp = [] + for i in item.int64_data: + temp.append(i) + np_data = np.array(temp, dtype=np.int64) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + else: + print( + "tensor name: ", + tensor_name, + ", type: ", + item.data_type, + ", len: ", + len(item.raw_data), + len(item.float_data), + len(item.int32_data), + len(item.int64_data), + ", will not save into weights file", + ) + return model, var_dict \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/inference.py b/models/cv/detection/detr/ixrt/inference.py new file mode 100755 index 0000000000000000000000000000000000000000..eb33b614ee4f11a1fbe09cc225cd5f98e292ee5f --- /dev/null +++ b/models/cv/detection/detr/ixrt/inference.py @@ -0,0 +1,239 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type, + workers=8 + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + batch_pred_logits = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + batch_pred_boxes = np.zeros(outputs[1]["shape"], outputs[1]["dtype"]) + print(f"pred_logits shape : {batch_pred_logits.shape} pred_logits type : {batch_pred_logits.dtype}") + print(f"pred_boxes shape : {batch_pred_boxes.shape} pred_boxes type : {batch_pred_boxes.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + + cur_bsz_sample = batch_data.shape[0] + + # Set input + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + + # Forward + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + cuda.memcpy_dtoh(batch_pred_logits, outputs[0]["allocation"]) + cuda.memcpy_dtoh(batch_pred_boxes, outputs[1]["allocation"]) + + for (pred_logits, pred_boxes, img_h, img_w, img_id) in zip( + batch_pred_logits, + batch_pred_boxes, + batch_img_shape[0], + batch_img_shape[1], + batch_img_id): + pred_boxes = post_process_func(pred_logits, pred_boxes, [img_w, img_h]) + # print(img_id) + # print(img_w, img_h) + + # import ipdb + # ipdb.set_trace() + + save2json(img_id, pred_boxes, json_result) + + fps = num_samples / forward_time + + if config.test_mode == "FPS": + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(1) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/load_ixrt_plugin.py b/models/cv/detection/detr/ixrt/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..ae47dc8e854b6bea1f768e65c4dd481048bfebce --- /dev/null +++ b/models/cv/detection/detr/ixrt/load_ixrt_plugin.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/modify_batchsize.py b/models/cv/detection/detr/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..5c01e8b6908e66902429177d05f6c8137a759f51 --- /dev/null +++ b/models/cv/detection/detr/ixrt/modify_batchsize.py @@ -0,0 +1,179 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from copy import deepcopy +import numpy as np +import onnx +from onnx import numpy_helper + +from extract_graph_weight import parse_onnx_model + + +def modify_shape_dim(dim, bsz): + batch_size = bsz + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim.dim_value = int(batch_size) + else: + # set batch size of 1 + dim.dim_value = 1 + +def change_input_dim(onnx_model, bsz): + inputs = onnx_model.graph.input + for input in inputs: + dim1 = input.type.tensor_type.shape.dim[0] + old_bsz = dim1.dim_value + modify_shape_dim(dim1, bsz) + return old_bsz + +# input[1] shape is initializer +def change_reshape_initializer(model, var_dict, old_bsz, bsz): + print("change_reshape_initializer") + modified_list = list() + for name, node_dict in model["nodes"].items(): + if node_dict["op_type"] != "Reshape": + continue + shape_name = node_dict["inputs"][1] + new_datas = deepcopy(var_dict[shape_name]) + done = False + if (len(new_datas) == 2): + if new_datas[0] == 625: + new_datas[0] = 625 * (bsz / old_bsz) + if new_datas[0] / old_bsz == 100: + new_datas[0] = 100 * bsz + elif (len(new_datas) == 3): + for i in range(len(new_datas)): + if new_datas[i] == old_bsz: + new_datas[i] = bsz + done = True + if done == False: + for i in range(len(new_datas)): + if new_datas[i] / old_bsz == 8: + new_datas[i] = (bsz / old_bsz) * 8 + done = True + + var_dict[shape_name] = new_datas + modified_list.append(shape_name) + return modified_list + +def change_matmul_initializer(model, var_dict, bsz): + print("change_matmul_initializer") + modified_list = list() + for name, node_dict in model["nodes"].items(): + if node_dict["op_type"] != "MatMul": + continue + for edge_name in node_dict["inputs"]: + if edge_name not in var_dict: + continue + if len(var_dict[edge_name].shape) != 3: + continue + data = deepcopy(var_dict[edge_name]) + + datas = list() + for _ in range(bsz): + datas.append(data) + new_datas = np.concatenate(datas, axis=0) + var_dict[edge_name] = new_datas + modified_list.append(edge_name) + return modified_list + +def change_add_initializer(model, var_dict, bsz): + print("change_add_initializer") + modified_list = list() + for name, node_dict in model["nodes"].items(): + if node_dict["op_type"] != "Add": + continue + for edge_name in node_dict["inputs"]: + if edge_name not in var_dict: + continue + if len(var_dict[edge_name].shape) != 3: + continue + data = deepcopy(var_dict[edge_name])[:, 0:1, ...] + + datas = list() + for _ in range(bsz): + datas.append(data) + new_datas = np.concatenate(datas, axis=1) + var_dict[edge_name] = new_datas + modified_list.append(edge_name) + return modified_list + +# A certain mode, input for Concat operator maybe constant. +def change_concat_initializer(model, var_dict, bsz): + print("change_concat_initializer") + modified_list = list() + for name, node_dict in model["nodes"].items(): + if node_dict["op_type"] != "Concat": + continue + for edge_name in node_dict["inputs"]: + if edge_name not in var_dict: + continue + data = deepcopy(var_dict[edge_name])[0:1, ...] + + datas = list() + for _ in range(bsz): + datas.append(data) + new_datas = np.concatenate(datas, axis=0) + var_dict[edge_name] = new_datas + modified_list.append(edge_name) + return modified_list + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + onnx_model = onnx.load(args.origin_model) + bsz = args.batch_size + old_bsz = change_input_dim(onnx_model, bsz) + if old_bsz == bsz: + print("Change batch size skipped") + onnx.save(onnx_model, args.output_model) + exit() + + model, weights = parse_onnx_model(onnx_model) + + modified_list = list() + reshape_modified = change_reshape_initializer(model, weights, old_bsz, bsz) + concat_modified = change_concat_initializer(model, weights, bsz) + matmul_modified = change_matmul_initializer(model, weights, bsz) + add_modified = change_add_initializer(model, weights, bsz) + modified_list.extend(reshape_modified) + modified_list.extend(concat_modified) + modified_list.extend(matmul_modified) + modified_list.extend(add_modified) + + # Remove the old initializer, and append new. + initializer = onnx_model.graph.initializer + for name in modified_list: + for item in initializer: + if name == item.name: + initializer.remove(item) + + data = weights[name] + new_params = numpy_helper.from_array(data, name=name) + initializer.append(new_params) + + onnx.save(onnx_model, args.output_model) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/refine_model.py b/models/cv/detection/detr/ixrt/refine_model.py new file mode 100644 index 0000000000000000000000000000000000000000..0483e0e90bbe66902447798ae041533096c1cdb8 --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_model.py @@ -0,0 +1,291 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import dataclasses + +import torch +import onnx + +from refine_utils.matmul_to_gemm_pass import FusedGemmPass +from refine_utils.linear_pass import FusedLinearPass + +from refine_utils.common import * + +def get_constant_input_name_of_operator(graph: Graph, operator: Operator): + const = None + for input in operator.inputs: + if not graph.containe_var(input): + continue + + if not graph.is_leaf_variable(input): + continue + + input_var = graph.get_variable(input) + if input_var.value is not None: + const = input + return const + +class FuseLayerNormPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + find_sequence_subgraph( + graph, + [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], + self.fuse_layer_norm, + strict=False + ) + return graph + + def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): + # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 + if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: + return + + # 检查 POW 的输入是否和 DIV 的输入是一致的 + if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: + return + + # 检查部分算子的输出是否被多个算子使用 + nodes = pattern.nodes + for node in [nodes[0]] + nodes[2:-1]: + next_ops = graph.get_next_operators(node.operator) + if len(next_ops) > 1: + return + + eps = None + for input in nodes[4].operator.inputs: + input_var = graph.get_variable(input) + if input_var.value is not None and graph.is_leaf_variable(input): + eps = to_py_type(input_var.value) + + scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) + bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + bias_var = graph.get_variable(bias) + print(bias_var) + + attributes = { + "axis": nodes[0].operator.attributes.axes, + "epsilon": eps, + } + + + layer_norm_op = self.transform.make_operator( + op_type="LayerNormalization", + inputs=[nodes[0].operator.inputs[0], scale, bias], + outputs=[nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(layer_norm_op) + +class FusedGeluPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True + ) + return graph + + def fuse_gelu(self, graph: Graph, pattern: PatternGraph): + nodes = pattern.nodes + prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] + next_ops = self.transform.get_next_operators(prev_op) + if len(next_ops) != 2: + return + + if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: + return + + gelu_op_input = None + for input in nodes[3].operator.inputs: + if input in nodes[0].operator.inputs: + gelu_op_input = input + break + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + gelu_op = self.transform.make_operator( + op_type=OP.GELU, + inputs=[gelu_op_input], + outputs=[nodes[-1].operator.outputs[0]] + ) + self.transform.add_operator(gelu_op) + +@dataclasses.dataclass +class NormalizeAttr(BaseOperatorAttr): + p: float = 2.0 + epsilon: float = 1e-12 + axis: int = 1 + + +@registe_operator(OP.GELU) +class GeluOperator(BaseOperator): + + def call( + self, + executor, + operator: Operator, + inputs: List, + attr: NormalizeAttr, + ): + return F.gelu(inputs[0]) + + def convert_onnx_operator( + self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto + ) -> Operator: + return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) + + def quantize( + self, + graph: Graph, + op: Operator, + operator_observer_config: QuantOperatorObserverConfig, + quant_outputs: bool = False, + ): + return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) + + + +class ClearUnsedVariables(BasePass): + + def process(self, graph: Graph) -> Graph: + vars = list(graph.variables) + + for var in vars: + if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): + graph.delete_variable(var) + + quant_params = list(graph.quant_parameters.keys()) + for var in quant_params: + if not graph.containe_var(var): + graph.quant_parameters.pop(var) + + return graph + +class FormatLayerNorm(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if "LayerNormalization" in op.op_type: + self.format_layer_norm(graph, op) + return graph + + def format_layer_norm(self, graph, operator): + if not hasattr(operator.attributes, "axis"): + return + if isinstance(operator.attributes.axis, (tuple, list)): + operator.attributes.axis = operator.attributes.axis[0] + +class FormatReshape(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if op.op_type == "Reshape": + self.format_reshape(graph, op) + + return graph + + def format_reshape(self, graph, operator): + shape = graph.get_variable(operator.inputs[1]) + shape.value = torch.tensor(shape.value, dtype=torch.int64) + +class FormatScalar(BasePass): + + def process(self, graph: Graph): + for var in graph.variables.values(): + var: Variable + use_ops = graph.get_dst_operators(var) + + if len(use_ops) == 0: + continue + + if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: + continue + + if var.value is not None and var.value.ndim == 0: + var.value = var.value.reshape(1) + print(f"Reshape scalar to tensor for {var.name}.") + + return graph + +class RenamePass(BasePass): + + def process(self, graph:Graph): + + names = [name for name in graph.operators.keys()] + for old_name in names: + new_name = old_name.replace("/", "#") + + graph.rename_operator(old_name, new_name) + + names = [name for name in graph.variables.keys()] + for name in names: + new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") + + graph.rename_vaiable(name, new_name, + with_variables=True, + with_operator_outputs=True) + + return graph + +def create_pipeline(example_inputs): + return PassSequence( + FuseLayerNormPass(), + FusedGeluPass(), + + ClearUnsedVariables(), + FormatLayerNorm(), + FormatReshape(), + # FormatScalar(), + # RenamePass() + ) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--onnx_path", type=str) + parser.add_argument("--dst_onnx_path", type=str) + + parser.add_argument("--bsz", type=int, default=8, + help="Batch size") + parser.add_argument("--imgsz", type=int, default=224, + help="Image size") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) + + refine_pipline = Pipeline( + create_source(f"{args.onnx_path}", example_inputs=example_inputs), + create_pipeline(example_inputs), + create_target( + f"{args.dst_onnx_path}", + example_inputs=example_inputs, + ) + ) + refine_pipline.run() + + print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/detection/detr/ixrt/refine_utils/__init__.py b/models/cv/detection/detr/ixrt/refine_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..162e24b462289dcee7b7a2888b93fad1115def81 --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/refine_utils/common.py b/models/cv/detection/detr/ixrt/refine_utils/common.py new file mode 100644 index 0000000000000000000000000000000000000000..b19dccfcd8fee8734725c8a6caeb9d8ae9a5a2bf --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_utils/common.py @@ -0,0 +1,37 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from typing import Union, Callable, List + +from tensorrt.deploy.api import * +from tensorrt.deploy.backend.onnx.converter import default_converter +from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type +from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr +from tensorrt.deploy.ir.operator_type import OperatorType as OP +from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name +from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence +from tensorrt.deploy.ir import Graph +from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator +from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator + +def find_sequence_subgraph(graph, + pattern: Union[List[str], PatternGraph], + callback: Callable[[Graph, PatternGraph], None], + strict=True): + if isinstance(pattern, List): + pattern = build_sequence_graph(pattern) + + matcher = GraphMatcher(pattern, strict=strict) + return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/refine_utils/linear_pass.py b/models/cv/detection/detr/ixrt/refine_utils/linear_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..bab7e5759bee7630a86eae18568c1911ccd4b2d5 --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_utils/linear_pass.py @@ -0,0 +1,114 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import dataclasses + +from refine_utils.common import * + +# AXB=C, Only for B is initializer + +class FusedLinearPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True + ) + find_sequence_subgraph( + graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True + ) + return graph + + def to_linear_with_bias(self, graph, pattern: PatternGraph): + matmul = pattern.nodes[0] + add = pattern.nodes[1] + if len(add.operator.inputs) != 2: + return + + b_var = graph.get_variable(matmul.operator.inputs[1]) + if not graph.is_leaf_variable(b_var) or b_var.value is None: + return + + if b_var.value.ndim != 2: + return + + bias_var = None + for input in add.operator.inputs: + if input not in matmul.operator.outputs: + bias_var = input + + inputs = matmul.operator.inputs + inputs.append(bias_var) + outputs = add.operator.outputs + + b_var.value = b_var.value.transpose(1, 0) + b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] + + hidden_size = b_var.shape[1] + linear_dim = b_var.shape[0] + + attributes = { + "hidden_size": hidden_size, + "linear_dim": linear_dim, + "has_bias": 1, + "act_type":"none" + } + + self.transform.make_operator( + "LinearFP16", + inputs=inputs, + outputs=outputs, + **attributes + ) + + self.transform.delete_operator(add.operator) + self.transform.delete_operator(matmul.operator) + + def to_linear(self, graph, pattern: PatternGraph): + matmul = pattern.nodes[0] + if len(matmul.operator.inputs) != 2: + return + + b_var = graph.get_variable(matmul.operator.inputs[1]) + if not graph.is_leaf_variable(b_var) or b_var.value is None: + return + + if b_var.value.ndim != 2: + return + + attributes = { + "hidden_size": hidden_size, + "linear_dim": linear_dim, + "has_bias": 0, + "act_type": "none" + } + + b_var.value = b_var.value.transpose(1, 0) + b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] + + hidden_size = b_var.shape[1] + linear_dim = b_var.shape[0] + + op = self.transform.make_operator( + op_type = "LinearFP16", + inputs = pattern.nodes[0].operator.inputs, + outputs=[pattern.nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(op) + + self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..5823c4a5a358d95eae68daa289b084441152d875 --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from refine_utils.common import * + +# +# Common pattern Matmul to Gemm +# +class FusedGemmPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True + ) + return graph + + def to_gemm(self, graph, pattern: PatternGraph): + matmul_op = pattern.nodes[0] + inputs = matmul_op.operator.inputs + outputs = matmul_op.operator.outputs + + if len(inputs)!=2 and len(outputs)!=1: + return + + for input in inputs: + if self.transform.is_leaf_variable(input): + return + + print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") + self.transform.delete_operator(matmul_op.operator) + + op = self.transform.make_operator( + op_type = "Gemm", + inputs = inputs, + outputs = outputs, + alpha = 1, + beta = 1, + transB = 1 + ) + + self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh new file mode 100755 index 0000000000000000000000000000000000000000..f81312a1ba14832bbbad5ec9d10f82358c80c59b --- /dev/null +++ b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh @@ -0,0 +1,142 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=1 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model Skipped, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Refine Model +let step++ +echo; +echo [STEP ${step}] : Refine Model +REFINE_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_refine.onnx +if [ -f ${REFINE_MODEL} ];then + echo " "Refine Model Skipped, ${REFINE_MODEL} has been existed +else + python3 ${RUN_DIR}/refine_model.py \ + --onnx_path ${SIM_MODEL} \ + --dst_onnx_path ${REFINE_MODEL} \ + --bsz ${BSZ} \ + --imgsz ${IMGSIZE} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skipped, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${REFINE_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh new file mode 100755 index 0000000000000000000000000000000000000000..a3881a3ca9afc31315dfd5a9f5665f492a56dcb7 --- /dev/null +++ b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh @@ -0,0 +1,142 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=1 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model Skipped, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Refine Model +let step++ +echo; +echo [STEP ${step}] : Refine Model +REFINE_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_refine.onnx +if [ -f ${REFINE_MODEL} ];then + echo " "Refine Model Skipped, ${REFINE_MODEL} has been existed +else + python3 ${RUN_DIR}/refine_model.py \ + --onnx_path ${SIM_MODEL} \ + --dst_onnx_path ${REFINE_MODEL} \ + --bsz ${BSZ} \ + --imgsz ${IMGSIZE} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skipped, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${REFINE_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/simplify_model.py b/models/cv/detection/detr/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1400fd81ddb4b3fae1b20d0fd35082a692f5d292 --- /dev/null +++ b/models/cv/detection/detr/ixrt/simplify_model.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file