From 37f4f8ac2c36a1aabfea17563dbc79636f71750b Mon Sep 17 00:00:00 2001 From: "xiaomei.wang" Date: Wed, 24 Apr 2024 16:05:17 +0800 Subject: [PATCH] Add densenet121, resnet_v1_d50, and squeezenet_1.1 --- .../classification/densenet121/ixrt/README.md | 53 ++++ .../densenet121/ixrt/build_engine.py | 109 +++++++ .../densenet121/ixrt/calibration_dataset.py | 112 +++++++ .../classification/densenet121/ixrt/common.py | 78 +++++ .../densenet121/ixrt/config/DENSENET_CONFIG | 33 ++ .../densenet121/ixrt/export_onnx.py | 43 +++ .../densenet121/ixrt/inference.py | 156 ++++++++++ .../densenet121/ixrt/modify_batchsize.py | 56 ++++ .../classification/densenet121/ixrt/quant.py | 166 ++++++++++ .../scripts/infer_densenet_fp16_accuracy.sh | 114 +++++++ .../infer_densenet_fp16_performance.sh | 114 +++++++ .../densenet121/ixrt/simplify_model.py | 40 +++ .../resnet_v1_d50/ixrt/README.md | 62 ++++ .../resnet_v1_d50/ixrt/build_engine.py | 109 +++++++ .../resnet_v1_d50/ixrt/build_i8_engine.py | 112 +++++++ .../resnet_v1_d50/ixrt/calibration_dataset.py | 112 +++++++ .../resnet_v1_d50/ixrt/common.py | 78 +++++ .../ixrt/config/RESNET_V1_D50_CONFIG | 33 ++ .../resnet_v1_d50/ixrt/export_onnx.py | 43 +++ .../resnet_v1_d50/ixrt/inference.py | 156 ++++++++++ .../resnet_v1_d50/ixrt/modify_batchsize.py | 57 ++++ .../resnet_v1_d50/ixrt/quant.py | 166 ++++++++++ .../resnet_v1_d50/ixrt/refine_model.py | 290 +++++++++++++++++ .../ixrt/refine_utils/__init__.py | 0 .../resnet_v1_d50/ixrt/refine_utils/common.py | 36 +++ .../ixrt/refine_utils/linear_pass.py | 113 +++++++ .../ixrt/refine_utils/matmul_to_gemm_pass.py | 54 ++++ .../infer_resnet_v1_d50_fp16_accuracy.sh | 114 +++++++ .../infer_resnet_v1_d50_fp16_performance.sh | 114 +++++++ .../infer_resnet_v1_d50_int8_accuracy.sh | 145 +++++++++ .../infer_resnet_v1_d50_int8_performance.sh | 144 +++++++++ .../resnet_v1_d50/ixrt/simplify_model.py | 40 +++ .../squeezenet_1.1/ixrt/README.md | 62 ++++ .../squeezenet_1.1/ixrt/build_engine.py | 53 ++++ .../squeezenet_1.1/ixrt/build_i8_engine.py | 112 +++++++ .../ixrt/calibration_dataset.py | 112 +++++++ .../squeezenet_1.1/ixrt/common.py | 79 +++++ .../ixrt/config/SQUEEZENET_V11_CONFIG | 19 ++ .../squeezenet_1.1/ixrt/export_onnx.py | 45 +++ .../squeezenet_1.1/ixrt/inference.py | 157 ++++++++++ .../squeezenet_1.1/ixrt/modify_batchsize.py | 57 ++++ .../squeezenet_1.1/ixrt/quant.py | 166 ++++++++++ .../squeezenet_1.1/ixrt/refine_model.py | 291 ++++++++++++++++++ .../infer_squeezenet_v11_fp16_accuracy.sh | 143 +++++++++ .../infer_squeezenet_v11_fp16_performance.sh | 143 +++++++++ .../infer_squeezenet_v11_int8_accuracy.sh | 145 +++++++++ .../infer_squeezenet_v11_int8_performance.sh | 144 +++++++++ .../squeezenet_1.1/ixrt/simplify_model.py | 41 +++ 48 files changed, 4821 insertions(+) create mode 100644 models/cv/classification/densenet121/ixrt/README.md create mode 100644 models/cv/classification/densenet121/ixrt/build_engine.py create mode 100644 models/cv/classification/densenet121/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/densenet121/ixrt/common.py create mode 100644 models/cv/classification/densenet121/ixrt/config/DENSENET_CONFIG create mode 100644 models/cv/classification/densenet121/ixrt/export_onnx.py create mode 100644 models/cv/classification/densenet121/ixrt/inference.py create mode 100644 models/cv/classification/densenet121/ixrt/modify_batchsize.py create mode 100644 models/cv/classification/densenet121/ixrt/quant.py create mode 100644 models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_accuracy.sh create mode 100644 models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_performance.sh create mode 100644 models/cv/classification/densenet121/ixrt/simplify_model.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/README.md create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/build_engine.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/build_i8_engine.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/common.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/config/RESNET_V1_D50_CONFIG create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/export_onnx.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/inference.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/modify_batchsize.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/quant.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/refine_model.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/refine_utils/__init__.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/refine_utils/common.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/refine_utils/linear_pass.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/refine_utils/matmul_to_gemm_pass.py create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh create mode 100644 models/cv/classification/resnet_v1_d50/ixrt/simplify_model.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/README.md create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/build_engine.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/build_i8_engine.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/common.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/config/SQUEEZENET_V11_CONFIG create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/export_onnx.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/inference.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/modify_batchsize.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/quant.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/refine_model.py create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_accuracy.sh create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_performance.sh create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_accuracy.sh create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_performance.sh create mode 100644 models/cv/classification/squeezenet_1.1/ixrt/simplify_model.py diff --git a/models/cv/classification/densenet121/ixrt/README.md b/models/cv/classification/densenet121/ixrt/README.md new file mode 100644 index 00000000..6f6f2b8d --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/README.md @@ -0,0 +1,53 @@ +# DenseNet + +## Description +Dense Convolutional Network (DenseNet), connects each layer to every other layer in a feed-forward fashion. Whereas traditional convolutional networks with L layers have L connections - one between each layer and its subsequent layer - our network has L(L+1)/2 direct connections. + +## Setup + +### Install +```bash +yum install mesa-libGL + +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install ppq +``` + +### Download + +Dataset: to download the validation dataset. + +### Model Conversion +```bash +mkdir checkpoints +python3 export_onnx.py --output_model checkpoints/densenet121.onnx +``` + +## Inference +```bash +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +export CONFIG_DIR=config/DENSENET_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_densenet_fp16_accuracy.sh +# Performance +bash scripts/infer_densenet_fp16_performance.sh +``` + + + +## Results + +Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) +---------|-----------|----------|----------|----------|-------- +DenseNet | | | | | + + diff --git a/models/cv/classification/densenet121/ixrt/build_engine.py b/models/cv/classification/densenet121/ixrt/build_engine.py new file mode 100644 index 00000000..01e126bc --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/build_engine.py @@ -0,0 +1,109 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from calibration_dataset import getdataloader +import cuda.cudart as cudart + +def assertSuccess(err): + assert(err == cudart.cudaError_t.cudaSuccess) + +class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): + + def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): + super().__init__() + self.cache_file = cache_file + self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) + self.batch_generator = iter(self.image_batcher) + size = img_sz*img_sz*3*bsz + __import__('pdb').set_trace() + err, self.batch_allocation = cudart.cudaMalloc(size) + assertSuccess(err) + + def __del__(self): + err,= cudart.cudaFree(self.batch_allocation) + assertSuccess(err) + + def get_batch_size(self): + return self.image_batcher.batch_size + + def get_batch(self, names): + try: + batch, _ = next(self.batch_generator) + batch = batch.numpy() + __import__('pdb').set_trace() + cudart.cudaMemcpy(self.batch_allocation, + np.ascontiguousarray(batch), + batch.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) + return [int(self.batch_allocation)] + except StopIteration: + return None + + def read_calibration_cache(self): + if os.path.exists(self.cache_file): + with open(self.cache_file, "rb") as f: + return f.read() + + def write_calibration_cache(self, cache): + with open(self.cache_file, "wb") as f: + f.write(cache) + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + print("precision : ", precision) + build_config.set_flag(precision) + if config.precision == "int8": + build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + args = parser.parse_args() + return args + +if __name__ == "__main__": + # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/") + # print(cali.get_batch_size()) + # print(cali.get_batch("hello")) + args = parse_args() + main(args) diff --git a/models/cv/classification/densenet121/ixrt/calibration_dataset.py b/models/cv/classification/densenet121/ixrt/calibration_dataset.py new file mode 100644 index 00000000..ec931c65 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/calibration_dataset.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=True, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader diff --git a/models/cv/classification/densenet121/ixrt/common.py b/models/cv/classification/densenet121/ixrt/common.py new file mode 100644 index 00000000..0458195e --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/common.py @@ -0,0 +1,78 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/densenet121/ixrt/config/DENSENET_CONFIG b/models/cv/classification/densenet121/ixrt/config/DENSENET_CONFIG new file mode 100644 index 00000000..8423aff6 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/config/DENSENET_CONFIG @@ -0,0 +1,33 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=DenseNet +ORIGINE_MODEL=densenet121.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=minmax +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/densenet121/ixrt/export_onnx.py b/models/cv/classification/densenet121/ixrt/export_onnx.py new file mode 100644 index 00000000..ff86753a --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/export_onnx.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import torch +import torchvision.models as models +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = models.densenet121(pretrained=True) +model.cuda() +model.eval() +input = torch.randn(1, 3, 224, 224, device='cuda') +export_onnx_file = args.output_model + +torch.onnx.export(model, + input, + export_onnx_file, + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names = ['input'], + output_names = ['output'],) +print(" ") +print('Model has been converted to ONNX') +print("exit") +exit() diff --git a/models/cv/classification/densenet121/ixrt/inference.py b/models/cv/classification/densenet121/ixrt/inference.py new file mode 100644 index 00000000..2c9dcb3f --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/inference.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/densenet121/ixrt/modify_batchsize.py b/models/cv/classification/densenet121/ixrt/modify_batchsize.py new file mode 100644 index 00000000..689b7a97 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/modify_batchsize.py @@ -0,0 +1,56 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/densenet121/ixrt/quant.py b/models/cv/classification/densenet121/ixrt/quant.py new file mode 100644 index 00000000..5d71c828 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/quant.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: + +在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 + +This file will show you how to quantize your network with PPQ + You should prepare your model and calibration dataset as follow: + + ~/working/model.onnx <-- your model + ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset + +if you are using caffe model: + ~/working/model.caffemdoel <-- your model + ~/working/model.prototext <-- your model + +### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### + +quantized model will be generated at: ~/working/quantized.onnx +""" +from ppq import * +from ppq.api import * +import os +from calibration_dataset import getdataloader +import argparse +import random +import numpy as np +import torch + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + + +config = parse_args() + +# modify configuration below: +WORKING_DIRECTORY = 'checkpoints' # choose your working directory +TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform +MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE +INPUT_LAYOUT = 'chw' # input data layout, chw or hwc +NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network +EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. +REQUIRE_ANALYSE = False +TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 +# ------------------------------------------------------------------- +# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 +# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx +# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 +# ------------------------------------------------------------------- +graph = None +if MODEL_TYPE == NetworkFramework.ONNX: + graph = load_onnx_graph(onnx_import_file=config.model) +if MODEL_TYPE == NetworkFramework.CAFFE: + graph = load_caffe_graph( + caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), + prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) +assert graph is not None, 'Graph Loading Error, Check your input again.' + +# ------------------------------------------------------------------- +# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 +# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 +# ------------------------------------------------------------------- +QS = QuantizationSettingFactory.default_setting() + +# ------------------------------------------------------------------- +# 下面向你展示了如何使用 finetuning 过程提升量化精度 +# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 +# 开启他们的方式都是 QS.xxxx = True +# 按需使用,不要全部打开,容易起飞 +# ------------------------------------------------------------------- +if TRAINING_YOUR_NETWORK: + QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 + QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 + QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' + + +dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) +# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 +# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 +with ENABLE_CUDA_KERNEL(): + print('网络正量化中,根据你的量化配置,这将需要一段时间:') + quantized = quantize_native_model( + setting=QS, # setting 对象用来控制标准量化逻辑 + model=graph, + calib_dataloader=dataloader, + calib_steps=config.step, + input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 + inputs=None, + # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] + collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, + # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None + platform=TARGET_PLATFORM, + device=EXECUTING_DEVICE, + do_quantize=True) + + # ------------------------------------------------------------------- + # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor + # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 + # 请注意,必须在 export 之前执行此操作。 + # ------------------------------------------------------------------- + executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) + # output = executor.forward(input) + + # ------------------------------------------------------------------- + # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 + # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% + # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 + # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 + # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 + # ------------------------------------------------------------------- + print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') + reports = graphwise_error_analyse( + graph=quantized, running_device=EXECUTING_DEVICE, steps=32, + dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) + for op, snr in reports.items(): + if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') + + if REQUIRE_ANALYSE: + print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') + layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, + interested_outputs=None, + dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) + + # ------------------------------------------------------------------- + # 使用 export_ppq_graph 函数来导出量化后的模型 + # PPQ 会根据你所选择的导出平台来修改模型格式 + # ------------------------------------------------------------------- + print('网络量化结束,正在生成目标文件:') + export_ppq_graph( + graph=quantized, platform=TARGET_PLATFORM, + graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), + config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_accuracy.sh new file mode 100644 index 00000000..b743d708 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_accuracy.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_performance.sh new file mode 100644 index 00000000..e7a4f1a7 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/scripts/infer_densenet_fp16_performance.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/densenet121/ixrt/simplify_model.py b/models/cv/classification/densenet121/ixrt/simplify_model.py new file mode 100644 index 00000000..9948a9fa --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/simplify_model.py @@ -0,0 +1,40 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/classification/resnet_v1_d50/ixrt/README.md b/models/cv/classification/resnet_v1_d50/ixrt/README.md new file mode 100644 index 00000000..6942304f --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/README.md @@ -0,0 +1,62 @@ +# ResNet50 + +## Description +Residual Networks, or ResNets, learn residual functions with reference to the layer inputs, instead of learning unreferenced functions. Instead of hoping each few stacked layers directly fit a desired underlying mapping, residual nets let these layers fit a residual mapping. + +## Setup + +### Install +```bash +yum install mesa-libGL + +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install ppq +pip3 install mmpretrain +pip3 install mmcv-lite +``` + +### Download + +Dataset: to download the validation dataset. + +### Model Conversion +```bash +mkdir checkpoints +python3 export_onnx.py --output_model checkpoints/resnet_v1_d50.onnx +``` + +## Inference +```bash +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +export CONFIG_DIR=config/RESNET_V1_D50_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_resnet_v1_d50_fp16_accuracy.sh +# Performance +bash scripts/infer_resnet_v1_d50_fp16_performance.sh +``` + +### INT8 +```bash +# Accuracy +bash scripts/infer_resnet_v1_d50_int8_accuracy.sh +# Performance +bash scripts/infer_resnet_v1_d50_int8_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) +---------|-----------|----------|----------|----------|-------- +ResNet50 | | | | | +ResNet50 | | | | | + + diff --git a/models/cv/classification/resnet_v1_d50/ixrt/build_engine.py b/models/cv/classification/resnet_v1_d50/ixrt/build_engine.py new file mode 100644 index 00000000..4fbcc015 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/build_engine.py @@ -0,0 +1,109 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from calibration_dataset import getdataloader +import cuda.cudart as cudart + +def assertSuccess(err): + assert(err == cudart.cudaError_t.cudaSuccess) + +class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): + + def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): + super().__init__() + self.cache_file = cache_file + self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) + self.batch_generator = iter(self.image_batcher) + size = img_sz*img_sz*3*bsz + __import__('pdb').set_trace() + err, self.batch_allocation = cudart.cudaMalloc(size) + assertSuccess(err) + + def __del__(self): + err,= cudart.cudaFree(self.batch_allocation) + assertSuccess(err) + + def get_batch_size(self): + return self.image_batcher.batch_size + + def get_batch(self, names): + try: + batch, _ = next(self.batch_generator) + batch = batch.numpy() + __import__('pdb').set_trace() + cudart.cudaMemcpy(self.batch_allocation, + np.ascontiguousarray(batch), + batch.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) + return [int(self.batch_allocation)] + except StopIteration: + return None + + def read_calibration_cache(self): + if os.path.exists(self.cache_file): + with open(self.cache_file, "rb") as f: + return f.read() + + def write_calibration_cache(self, cache): + with open(self.cache_file, "wb") as f: + f.write(cache) + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + print("precision : ", precision) + build_config.set_flag(precision) + if config.precision == "int8": + build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + args = parser.parse_args() + return args + +if __name__ == "__main__": + # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/") + # print(cali.get_batch_size()) + # print(cali.get_batch("hello")) + args = parse_args() + main(args) diff --git a/models/cv/classification/resnet_v1_d50/ixrt/build_i8_engine.py b/models/cv/classification/resnet_v1_d50/ixrt/build_i8_engine.py new file mode 100644 index 00000000..6038b33f --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/build_i8_engine.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import json +import os + +import tensorrt +import tensorrt as trt + +TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) + +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + +def GiB(val): + return val * 1 << 30 + + +def json_load(filename): + with open(filename) as json_file: + data = json.load(json_file) + return data + + +def setDynamicRange(network, json_file): + """Sets ranges for network layers.""" + quant_param_json = json_load(json_file) + act_quant = quant_param_json["act_quant_info"] + + for i in range(network.num_inputs): + input_tensor = network.get_input(i) + if act_quant.__contains__(input_tensor.name): + print(input_tensor.name) + value = act_quant[input_tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + input_tensor.dynamic_range = (tensor_min, tensor_max) + + for i in range(network.num_layers): + layer = network.get_layer(i) + + for output_index in range(layer.num_outputs): + tensor = layer.get_output(output_index) + + if act_quant.__contains__(tensor.name): + value = act_quant[tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + tensor.dynamic_range = (tensor_min, tensor_max) + else: + print("\033[1;32m%s\033[0m" % tensor.name) + + +def build_engine(onnx_file, json_file, engine_file): + builder = trt.Builder(TRT_LOGGER) + network = builder.create_network(EXPLICIT_BATCH) + + config = builder.create_builder_config() + + # If it is a dynamic onnx model , you need to add the following. + # profile = builder.create_optimization_profile() + # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) + # config.add_optimization_profile(profile) + + parser = trt.OnnxParser(network, TRT_LOGGER) + # config.max_workspace_size = GiB(1) + if not os.path.exists(onnx_file): + quit("ONNX file {} not found".format(onnx_file)) + + with open(onnx_file, "rb") as model: + if not parser.parse(model.read()): + print("ERROR: Failed to parse the ONNX file.") + for error in range(parser.num_errors): + print(parser.get_error(error)) + return None + + config.set_flag(trt.BuilderFlag.INT8) + + setDynamicRange(network, json_file) + + engine = builder.build_engine(network, config) + + with open(engine_file, "wb") as f: + f.write(engine.serialize()) + + +if __name__ == "__main__": + # Add plugins if needed + # import ctypes + # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") + parser = argparse.ArgumentParser( + description="Writing qparams to onnx to convert tensorrt engine." + ) + parser.add_argument("--onnx", type=str, default=None) + parser.add_argument("--qparam_json", type=str, default=None) + parser.add_argument("--engine", type=str, default=None) + arg = parser.parse_args() + + build_engine(arg.onnx, arg.qparam_json, arg.engine) + print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/calibration_dataset.py b/models/cv/classification/resnet_v1_d50/ixrt/calibration_dataset.py new file mode 100644 index 00000000..ec931c65 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/calibration_dataset.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=True, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader diff --git a/models/cv/classification/resnet_v1_d50/ixrt/common.py b/models/cv/classification/resnet_v1_d50/ixrt/common.py new file mode 100644 index 00000000..0458195e --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/common.py @@ -0,0 +1,78 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/resnet_v1_d50/ixrt/config/RESNET_V1_D50_CONFIG b/models/cv/classification/resnet_v1_d50/ixrt/config/RESNET_V1_D50_CONFIG new file mode 100644 index 00000000..5d1cb05d --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/config/RESNET_V1_D50_CONFIG @@ -0,0 +1,33 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=Resnet_V1_D50 +ORIGINE_MODEL=resnet_v1_d50.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=minmax +QUANT_BATCHSIZE=32 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/resnet_v1_d50/ixrt/export_onnx.py b/models/cv/classification/resnet_v1_d50/ixrt/export_onnx.py new file mode 100644 index 00000000..e1ef8d61 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/export_onnx.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import torch +from mmpretrain import get_model +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = get_model('resnetv1d50_8xb32_in1k', pretrained=True) +model.cuda() +model.eval() +input = torch.randn(32, 3, 224, 224, device='cuda') +export_onnx_file = args.output_model + +torch.onnx.export(model, + input, + export_onnx_file, + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names = ['input'], + output_names = ['output'],) +print(" ") +print('Model has been converted to ONNX') +print("exit") +exit() diff --git a/models/cv/classification/resnet_v1_d50/ixrt/inference.py b/models/cv/classification/resnet_v1_d50/ixrt/inference.py new file mode 100644 index 00000000..2c9dcb3f --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/inference.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/resnet_v1_d50/ixrt/modify_batchsize.py b/models/cv/classification/resnet_v1_d50/ixrt/modify_batchsize.py new file mode 100644 index 00000000..5466cb07 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/resnet_v1_d50/ixrt/quant.py b/models/cv/classification/resnet_v1_d50/ixrt/quant.py new file mode 100644 index 00000000..c728c7a1 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/quant.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: + +在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 + +This file will show you how to quantize your network with PPQ + You should prepare your model and calibration dataset as follow: + + ~/working/model.onnx <-- your model + ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset + +if you are using caffe model: + ~/working/model.caffemdoel <-- your model + ~/working/model.prototext <-- your model + +### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### + +quantized model will be generated at: ~/working/quantized.onnx +""" +from ppq import * +from ppq.api import * +import os +from calibration_dataset import getdataloader +import argparse +import random +import numpy as np +import torch + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + + +config = parse_args() + +# modify configuration below: +WORKING_DIRECTORY = 'checkpoints' # choose your working directory +TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform +MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE +INPUT_LAYOUT = 'chw' # input data layout, chw or hwc +NETWORK_INPUTSHAPE = [32, 3, 224, 224] # input shape of your network +EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. +REQUIRE_ANALYSE = False +TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 +# ------------------------------------------------------------------- +# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 +# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx +# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 +# ------------------------------------------------------------------- +graph = None +if MODEL_TYPE == NetworkFramework.ONNX: + graph = load_onnx_graph(onnx_import_file=config.model) +if MODEL_TYPE == NetworkFramework.CAFFE: + graph = load_caffe_graph( + caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), + prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) +assert graph is not None, 'Graph Loading Error, Check your input again.' + +# ------------------------------------------------------------------- +# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 +# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 +# ------------------------------------------------------------------- +QS = QuantizationSettingFactory.default_setting() + +# ------------------------------------------------------------------- +# 下面向你展示了如何使用 finetuning 过程提升量化精度 +# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 +# 开启他们的方式都是 QS.xxxx = True +# 按需使用,不要全部打开,容易起飞 +# ------------------------------------------------------------------- +if TRAINING_YOUR_NETWORK: + QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 + QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 + QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' + + +dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) +# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 +# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 +with ENABLE_CUDA_KERNEL(): + print('网络正量化中,根据你的量化配置,这将需要一段时间:') + quantized = quantize_native_model( + setting=QS, # setting 对象用来控制标准量化逻辑 + model=graph, + calib_dataloader=dataloader, + calib_steps=config.step, + input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 + inputs=None, + # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] + collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, + # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None + platform=TARGET_PLATFORM, + device=EXECUTING_DEVICE, + do_quantize=True) + + # ------------------------------------------------------------------- + # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor + # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 + # 请注意,必须在 export 之前执行此操作。 + # ------------------------------------------------------------------- + executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) + # output = executor.forward(input) + + # ------------------------------------------------------------------- + # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 + # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% + # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 + # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 + # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 + # ------------------------------------------------------------------- + print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') + reports = graphwise_error_analyse( + graph=quantized, running_device=EXECUTING_DEVICE, steps=32, + dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) + for op, snr in reports.items(): + if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') + + if REQUIRE_ANALYSE: + print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') + layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, + interested_outputs=None, + dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) + + # ------------------------------------------------------------------- + # 使用 export_ppq_graph 函数来导出量化后的模型 + # PPQ 会根据你所选择的导出平台来修改模型格式 + # ------------------------------------------------------------------- + print('网络量化结束,正在生成目标文件:') + export_ppq_graph( + graph=quantized, platform=TARGET_PLATFORM, + graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), + config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_model.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_model.py new file mode 100644 index 00000000..6f1e6c2f --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/refine_model.py @@ -0,0 +1,290 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import argparse +import dataclasses + +import torch +import onnx + +from refine_utils.matmul_to_gemm_pass import FusedGemmPass +from refine_utils.linear_pass import FusedLinearPass + +from refine_utils.common import * + +def get_constant_input_name_of_operator(graph: Graph, operator: Operator): + const = None + for input in operator.inputs: + if not graph.containe_var(input): + continue + + if not graph.is_leaf_variable(input): + continue + + input_var = graph.get_variable(input) + if input_var.value is not None: + const = input + return const + +class FuseLayerNormPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + find_sequence_subgraph( + graph, + [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], + self.fuse_layer_norm, + strict=False + ) + return graph + + def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): + # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 + if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: + return + + # 检查 POW 的输入是否和 DIV 的输入是一致的 + if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: + return + + # 检查部分算子的输出是否被多个算子使用 + nodes = pattern.nodes + for node in [nodes[0]] + nodes[2:-1]: + next_ops = graph.get_next_operators(node.operator) + if len(next_ops) > 1: + return + + eps = None + for input in nodes[4].operator.inputs: + input_var = graph.get_variable(input) + if input_var.value is not None and graph.is_leaf_variable(input): + eps = to_py_type(input_var.value) + + scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) + bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + bias_var = graph.get_variable(bias) + print(bias_var) + + attributes = { + "axis": nodes[0].operator.attributes.axes, + "epsilon": eps, + } + + + layer_norm_op = self.transform.make_operator( + op_type="LayerNormalization", + inputs=[nodes[0].operator.inputs[0], scale, bias], + outputs=[nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(layer_norm_op) + +class FusedGeluPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True + ) + return graph + + def fuse_gelu(self, graph: Graph, pattern: PatternGraph): + nodes = pattern.nodes + prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] + next_ops = self.transform.get_next_operators(prev_op) + if len(next_ops) != 2: + return + + if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: + return + + gelu_op_input = None + for input in nodes[3].operator.inputs: + if input in nodes[0].operator.inputs: + gelu_op_input = input + break + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + gelu_op = self.transform.make_operator( + op_type=OP.GELU, + inputs=[gelu_op_input], + outputs=[nodes[-1].operator.outputs[0]] + ) + self.transform.add_operator(gelu_op) + +@dataclasses.dataclass +class NormalizeAttr(BaseOperatorAttr): + p: float = 2.0 + epsilon: float = 1e-12 + axis: int = 1 + + +@registe_operator(OP.GELU) +class GeluOperator(BaseOperator): + + def call( + self, + executor, + operator: Operator, + inputs: List, + attr: NormalizeAttr, + ): + return F.gelu(inputs[0]) + + def convert_onnx_operator( + self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto + ) -> Operator: + return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) + + def quantize( + self, + graph: Graph, + op: Operator, + operator_observer_config: QuantOperatorObserverConfig, + quant_outputs: bool = False, + ): + return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) + + + +class ClearUnsedVariables(BasePass): + + def process(self, graph: Graph) -> Graph: + vars = list(graph.variables) + + for var in vars: + if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): + graph.delete_variable(var) + + quant_params = list(graph.quant_parameters.keys()) + for var in quant_params: + if not graph.containe_var(var): + graph.quant_parameters.pop(var) + + return graph + +class FormatLayerNorm(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if "LayerNorm" in op.op_type: + self.format_layer_norm(graph, op) + return graph + + def format_layer_norm(self, graph, operator): + if not hasattr(operator.attributes, "axis"): + return + if isinstance(operator.attributes.axis, (tuple, list)): + operator.attributes.axis = operator.attributes.axis[0] + +class FormatReshape(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if op.op_type == "Reshape": + self.format_reshape(graph, op) + + return graph + + def format_reshape(self, graph, operator): + shape = graph.get_variable(operator.inputs[1]) + shape.value = torch.tensor(shape.value, dtype=torch.int64) + +class FormatScalar(BasePass): + + def process(self, graph: Graph): + for var in graph.variables.values(): + var: Variable + use_ops = graph.get_dst_operators(var) + + if len(use_ops) == 0: + continue + + if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: + continue + + if var.value is not None and var.value.ndim == 0: + var.value = var.value.reshape(1) + print(f"Reshape scalar to tensor for {var.name}.") + + return graph + +class RenamePass(BasePass): + + def process(self, graph:Graph): + + names = [name for name in graph.operators.keys()] + for old_name in names: + new_name = old_name.replace("/", "#") + + graph.rename_operator(old_name, new_name) + + names = [name for name in graph.variables.keys()] + for name in names: + new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") + + graph.rename_vaiable(name, new_name, + with_variables=True, + with_operator_outputs=True) + + return graph + +def create_pipeline(example_inputs): + return PassSequence( + # FuseLayerNormPass(), + FusedGeluPass(), + + # ClearUnsedVariables(), + # FormatLayerNorm(), + # FormatReshape(), + # FormatScalar(), + # RenamePass() + ) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--onnx_path", type=str) + parser.add_argument("--dst_onnx_path", type=str) + + parser.add_argument("--bsz", type=int, default=8, + help="Batch size") + parser.add_argument("--imgsz", type=int, default=224, + help="Image size") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) + + refine_pipline = Pipeline( + create_source(f"{args.onnx_path}", example_inputs=example_inputs), + create_pipeline(example_inputs), + create_target( + f"{args.dst_onnx_path}", + example_inputs=example_inputs, + ) + ) + refine_pipline.run() + + print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/__init__.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/common.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/common.py new file mode 100644 index 00000000..2af19a14 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/common.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from typing import Union, Callable, List + +from tensorrt.deploy.api import * +from tensorrt.deploy.backend.onnx.converter import default_converter +from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type +from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr +from tensorrt.deploy.ir.operator_type import OperatorType as OP +from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name +from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence +from tensorrt.deploy.ir import Graph +from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator +from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator + +def find_sequence_subgraph(graph, + pattern: Union[List[str], PatternGraph], + callback: Callable[[Graph, PatternGraph], None], + strict=True): + if isinstance(pattern, List): + pattern = build_sequence_graph(pattern) + + matcher = GraphMatcher(pattern, strict=strict) + return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/linear_pass.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/linear_pass.py new file mode 100644 index 00000000..29b5e4a9 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/linear_pass.py @@ -0,0 +1,113 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import dataclasses + +from refine_utils.common import * + +# AXB=C, Only for B is initializer + +class FusedLinearPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True + ) + find_sequence_subgraph( + graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True + ) + return graph + + def to_linear_with_bias(self, graph, pattern: PatternGraph): + matmul = pattern.nodes[0] + add = pattern.nodes[1] + if len(add.operator.inputs) != 2: + return + + b_var = graph.get_variable(matmul.operator.inputs[1]) + if not graph.is_leaf_variable(b_var) or b_var.value is None: + return + + if b_var.value.ndim != 2: + return + + bias_var = None + for input in add.operator.inputs: + if input not in matmul.operator.outputs: + bias_var = input + + inputs = matmul.operator.inputs + inputs.append(bias_var) + outputs = add.operator.outputs + + b_var.value = b_var.value.transpose(1, 0) + b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] + + hidden_size = b_var.shape[1] + linear_dim = b_var.shape[0] + + attributes = { + "hidden_size": hidden_size, + "linear_dim": linear_dim, + "has_bias": 1, + "act_type":"none" + } + + self.transform.make_operator( + "LinearFP16", + inputs=inputs, + outputs=outputs, + **attributes + ) + + self.transform.delete_operator(add.operator) + self.transform.delete_operator(matmul.operator) + + def to_linear(self, graph, pattern: PatternGraph): + matmul = pattern.nodes[0] + if len(matmul.operator.inputs) != 2: + return + + b_var = graph.get_variable(matmul.operator.inputs[1]) + if not graph.is_leaf_variable(b_var) or b_var.value is None: + return + + if b_var.value.ndim != 2: + return + + attributes = { + "hidden_size": hidden_size, + "linear_dim": linear_dim, + "has_bias": 0, + "act_type": "none" + } + + b_var.value = b_var.value.transpose(1, 0) + b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] + + hidden_size = b_var.shape[1] + linear_dim = b_var.shape[0] + + op = self.transform.make_operator( + op_type = "LinearFP16", + inputs = pattern.nodes[0].operator.inputs, + outputs=[pattern.nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(op) + + self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/matmul_to_gemm_pass.py new file mode 100644 index 00000000..4ebfac4d --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/refine_utils/matmul_to_gemm_pass.py @@ -0,0 +1,54 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from refine_utils.common import * + +# +# Common pattern Matmul to Gemm +# +class FusedGemmPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True + ) + return graph + + def to_gemm(self, graph, pattern: PatternGraph): + matmul_op = pattern.nodes[0] + inputs = matmul_op.operator.inputs + outputs = matmul_op.operator.outputs + + if len(inputs)!=2 and len(outputs)!=1: + return + + for input in inputs: + if self.transform.is_leaf_variable(input): + return + + print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") + self.transform.delete_operator(matmul_op.operator) + + op = self.transform.make_operator( + op_type = "Gemm", + inputs = inputs, + outputs = outputs, + alpha = 1, + beta = 1, + transB = 1 + ) + + self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh new file mode 100644 index 00000000..b743d708 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh new file mode 100644 index 00000000..e7a4f1a7 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh new file mode 100644 index 00000000..2b2db01a --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +set -x +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=int8 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh new file mode 100644 index 00000000..bec51520 --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=int8 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnet_v1_d50/ixrt/simplify_model.py b/models/cv/classification/resnet_v1_d50/ixrt/simplify_model.py new file mode 100644 index 00000000..9948a9fa --- /dev/null +++ b/models/cv/classification/resnet_v1_d50/ixrt/simplify_model.py @@ -0,0 +1,40 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + diff --git a/models/cv/classification/squeezenet_1.1/ixrt/README.md b/models/cv/classification/squeezenet_1.1/ixrt/README.md new file mode 100644 index 00000000..5e5224d6 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/README.md @@ -0,0 +1,62 @@ +# SqueezeNet 1.1 + +## Description +SqueezeNet 1.1 is a deep learning model for image classification, designed to be lightweight and efficient for deployment on resource-constrained devices. + +It was developed by researchers at DeepScale and released in 2016. +## Setup + +### Install +``` +yum install mesa-libGL +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install ppq +pip3 install opencv-python==4.6.0.66 +``` +### Download +Pretrained model: https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth + +Dataset: https://www.image-net.org/download.php to download the validation dataset. + +### Model Conversion +``` +mkdir checkpoints +python3 export_onnx.py --origin_model /path/to/squeezenet1_1-b8a52dc0.pth --output_model checkpoints/squeezenetv11.onnx +``` + +## Inference +``` +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=./ +export CONFIG_DIR=config/SQUEEZENET_V11_CONFIG + +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_squeezenet_v11_fp16_accuracy.sh +# Performance +bash scripts/infer_squeezenet_v11_fp16_performance.sh +``` + +### INT8 + +```bash +# Accuracy +bash scripts/infer_squeezenet_v11_int8_accuracy.sh +# Performance +bash scripts/infer_squeezenet_v11_int8_performance.sh +``` + +## Results +Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) +---------------|-----------|----------|---------|----------|-------- +SqueezeNet 1.1 | | FP16 | | | +SqueezeNet 1.1 | | INT8 | | | + diff --git a/models/cv/classification/squeezenet_1.1/ixrt/build_engine.py b/models/cv/classification/squeezenet_1.1/ixrt/build_engine.py new file mode 100644 index 00000000..32f549d8 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/build_engine.py @@ -0,0 +1,53 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/build_i8_engine.py b/models/cv/classification/squeezenet_1.1/ixrt/build_i8_engine.py new file mode 100644 index 00000000..6038b33f --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/build_i8_engine.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import json +import os + +import tensorrt +import tensorrt as trt + +TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) + +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + +def GiB(val): + return val * 1 << 30 + + +def json_load(filename): + with open(filename) as json_file: + data = json.load(json_file) + return data + + +def setDynamicRange(network, json_file): + """Sets ranges for network layers.""" + quant_param_json = json_load(json_file) + act_quant = quant_param_json["act_quant_info"] + + for i in range(network.num_inputs): + input_tensor = network.get_input(i) + if act_quant.__contains__(input_tensor.name): + print(input_tensor.name) + value = act_quant[input_tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + input_tensor.dynamic_range = (tensor_min, tensor_max) + + for i in range(network.num_layers): + layer = network.get_layer(i) + + for output_index in range(layer.num_outputs): + tensor = layer.get_output(output_index) + + if act_quant.__contains__(tensor.name): + value = act_quant[tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + tensor.dynamic_range = (tensor_min, tensor_max) + else: + print("\033[1;32m%s\033[0m" % tensor.name) + + +def build_engine(onnx_file, json_file, engine_file): + builder = trt.Builder(TRT_LOGGER) + network = builder.create_network(EXPLICIT_BATCH) + + config = builder.create_builder_config() + + # If it is a dynamic onnx model , you need to add the following. + # profile = builder.create_optimization_profile() + # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) + # config.add_optimization_profile(profile) + + parser = trt.OnnxParser(network, TRT_LOGGER) + # config.max_workspace_size = GiB(1) + if not os.path.exists(onnx_file): + quit("ONNX file {} not found".format(onnx_file)) + + with open(onnx_file, "rb") as model: + if not parser.parse(model.read()): + print("ERROR: Failed to parse the ONNX file.") + for error in range(parser.num_errors): + print(parser.get_error(error)) + return None + + config.set_flag(trt.BuilderFlag.INT8) + + setDynamicRange(network, json_file) + + engine = builder.build_engine(network, config) + + with open(engine_file, "wb") as f: + f.write(engine.serialize()) + + +if __name__ == "__main__": + # Add plugins if needed + # import ctypes + # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") + parser = argparse.ArgumentParser( + description="Writing qparams to onnx to convert tensorrt engine." + ) + parser.add_argument("--onnx", type=str, default=None) + parser.add_argument("--qparam_json", type=str, default=None) + parser.add_argument("--engine", type=str, default=None) + arg = parser.parse_args() + + build_engine(arg.onnx, arg.qparam_json, arg.engine) + print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/calibration_dataset.py b/models/cv/classification/squeezenet_1.1/ixrt/calibration_dataset.py new file mode 100644 index 00000000..b394c76b --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/calibration_dataset.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/common.py b/models/cv/classification/squeezenet_1.1/ixrt/common.py new file mode 100644 index 00000000..abdc147c --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/common.py @@ -0,0 +1,79 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +import pycuda.driver as cuda + +def eval_batch(batch_score, batch_label): + batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/classification/squeezenet_1.1/ixrt/config/SQUEEZENET_V11_CONFIG b/models/cv/classification/squeezenet_1.1/ixrt/config/SQUEEZENET_V11_CONFIG new file mode 100644 index 00000000..efc2a087 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/config/SQUEEZENET_V11_CONFIG @@ -0,0 +1,19 @@ +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=SqueezeNet_v11 +ORIGINE_MODEL=squeezenetv11.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/squeezenet_1.1/ixrt/export_onnx.py b/models/cv/classification/squeezenet_1.1/ixrt/export_onnx.py new file mode 100644 index 00000000..2ff4fe09 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/export_onnx.py @@ -0,0 +1,45 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import torch +import torchvision.models as models +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = models.squeezenet1_1() +model.load_state_dict(torch.load(args.origin_model)) +model.cuda() +model.eval() +input = torch.randn(1, 3, 224, 224, device='cuda') +export_onnx_file = args.output_model + +torch.onnx.export(model, + input, + export_onnx_file, + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names = ['input'], + output_names = ['output'],) +print(" ") +print('Model has been converted to ONNX') +print("exit") +exit() \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/inference.py b/models/cv/classification/squeezenet_1.1/ixrt/inference.py new file mode 100644 index 00000000..1ec56b4a --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/inference.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import json +import os +import re +import time +from tqdm import tqdm + +import cv2 +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import tensorrt + +from calibration_dataset import getdataloader +from common import eval_batch, create_engine_context, get_io_bindings + +def main(config): + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(config.engine_file, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Inference + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + + for i in range(config.loop_count): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + num_samples = 50000 + if config.loop_count * config.bsz < num_samples: + num_samples = config.loop_count * config.bsz + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + elif config.test_mode == "ACC": + + ## Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + total_sample = 0 + acc_top1, acc_top5 = 0, 0 + + with tqdm(total= len(dataloader)) as _tqdm: + for idx, (batch_data, batch_label) in enumerate(dataloader): + batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = np.ascontiguousarray(batch_data) + total_sample += batch_data.shape[0] + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + context.execute_v2(allocations) + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model + if len(output.shape) == 4: + output = output.squeeze(axis=(2,3)) + + batch_top1, batch_top5 = eval_batch(output, batch_label) + acc_top1 += batch_top1 + acc_top5 += batch_top5 + + _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), + acc_5='{:.4f}'.format(acc_top5/total_sample)) + _tqdm.update(1) + + print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") + print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") + acc1 = acc_top1/total_sample + print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") + if acc1 >= config.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--engine_file", + type=str, + help="engine file path" + ) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=224, + help="inference size h,w", + ) + parser.add_argument("--use_async", action="store_true") + parser.add_argument( + "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--acc_target", type=float, default=-1.0) + parser.add_argument("--loop_count", type=int, default=-1) + + config = parser.parse_args() + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/classification/squeezenet_1.1/ixrt/modify_batchsize.py b/models/cv/classification/squeezenet_1.1/ixrt/modify_batchsize.py new file mode 100644 index 00000000..4ac42a30 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/modify_batchsize.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) + + + + + diff --git a/models/cv/classification/squeezenet_1.1/ixrt/quant.py b/models/cv/classification/squeezenet_1.1/ixrt/quant.py new file mode 100644 index 00000000..5d71c828 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/quant.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: + +在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 + +This file will show you how to quantize your network with PPQ + You should prepare your model and calibration dataset as follow: + + ~/working/model.onnx <-- your model + ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset + +if you are using caffe model: + ~/working/model.caffemdoel <-- your model + ~/working/model.prototext <-- your model + +### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### + +quantized model will be generated at: ~/working/quantized.onnx +""" +from ppq import * +from ppq.api import * +import os +from calibration_dataset import getdataloader +import argparse +import random +import numpy as np +import torch + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + + +config = parse_args() + +# modify configuration below: +WORKING_DIRECTORY = 'checkpoints' # choose your working directory +TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform +MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE +INPUT_LAYOUT = 'chw' # input data layout, chw or hwc +NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network +EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. +REQUIRE_ANALYSE = False +TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 +# ------------------------------------------------------------------- +# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 +# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx +# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 +# ------------------------------------------------------------------- +graph = None +if MODEL_TYPE == NetworkFramework.ONNX: + graph = load_onnx_graph(onnx_import_file=config.model) +if MODEL_TYPE == NetworkFramework.CAFFE: + graph = load_caffe_graph( + caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), + prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) +assert graph is not None, 'Graph Loading Error, Check your input again.' + +# ------------------------------------------------------------------- +# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 +# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 +# ------------------------------------------------------------------- +QS = QuantizationSettingFactory.default_setting() + +# ------------------------------------------------------------------- +# 下面向你展示了如何使用 finetuning 过程提升量化精度 +# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 +# 开启他们的方式都是 QS.xxxx = True +# 按需使用,不要全部打开,容易起飞 +# ------------------------------------------------------------------- +if TRAINING_YOUR_NETWORK: + QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 + QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 + QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' + + +dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) +# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 +# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 +with ENABLE_CUDA_KERNEL(): + print('网络正量化中,根据你的量化配置,这将需要一段时间:') + quantized = quantize_native_model( + setting=QS, # setting 对象用来控制标准量化逻辑 + model=graph, + calib_dataloader=dataloader, + calib_steps=config.step, + input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 + inputs=None, + # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] + collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, + # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None + platform=TARGET_PLATFORM, + device=EXECUTING_DEVICE, + do_quantize=True) + + # ------------------------------------------------------------------- + # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor + # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 + # 请注意,必须在 export 之前执行此操作。 + # ------------------------------------------------------------------- + executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) + # output = executor.forward(input) + + # ------------------------------------------------------------------- + # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 + # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% + # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 + # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 + # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 + # ------------------------------------------------------------------- + print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') + reports = graphwise_error_analyse( + graph=quantized, running_device=EXECUTING_DEVICE, steps=32, + dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) + for op, snr in reports.items(): + if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') + + if REQUIRE_ANALYSE: + print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') + layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, + interested_outputs=None, + dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) + + # ------------------------------------------------------------------- + # 使用 export_ppq_graph 函数来导出量化后的模型 + # PPQ 会根据你所选择的导出平台来修改模型格式 + # ------------------------------------------------------------------- + print('网络量化结束,正在生成目标文件:') + export_ppq_graph( + graph=quantized, platform=TARGET_PLATFORM, + graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), + config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/squeezenet_1.1/ixrt/refine_model.py b/models/cv/classification/squeezenet_1.1/ixrt/refine_model.py new file mode 100644 index 00000000..000ee4dc --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/refine_model.py @@ -0,0 +1,291 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import dataclasses + +import torch +import onnx + +from refine_utils.matmul_to_gemm_pass import FusedGemmPass +from refine_utils.linear_pass import FusedLinearPass + +from refine_utils.common import * + +def get_constant_input_name_of_operator(graph: Graph, operator: Operator): + const = None + for input in operator.inputs: + if not graph.containe_var(input): + continue + + if not graph.is_leaf_variable(input): + continue + + input_var = graph.get_variable(input) + if input_var.value is not None: + const = input + return const + +class FuseLayerNormPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + find_sequence_subgraph( + graph, + [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], + self.fuse_layer_norm, + strict=False + ) + return graph + + def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): + # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 + if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: + return + + # 检查 POW 的输入是否和 DIV 的输入是一致的 + if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: + return + + # 检查部分算子的输出是否被多个算子使用 + nodes = pattern.nodes + for node in [nodes[0]] + nodes[2:-1]: + next_ops = graph.get_next_operators(node.operator) + if len(next_ops) > 1: + return + + eps = None + for input in nodes[4].operator.inputs: + input_var = graph.get_variable(input) + if input_var.value is not None and graph.is_leaf_variable(input): + eps = to_py_type(input_var.value) + + scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) + bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + bias_var = graph.get_variable(bias) + print(bias_var) + + attributes = { + "axis": nodes[0].operator.attributes.axes, + "epsilon": eps, + } + + + layer_norm_op = self.transform.make_operator( + op_type="LayerNormalization", + inputs=[nodes[0].operator.inputs[0], scale, bias], + outputs=[nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(layer_norm_op) + +class FusedGeluPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True + ) + return graph + + def fuse_gelu(self, graph: Graph, pattern: PatternGraph): + nodes = pattern.nodes + prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] + next_ops = self.transform.get_next_operators(prev_op) + if len(next_ops) != 2: + return + + if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: + return + + gelu_op_input = None + for input in nodes[3].operator.inputs: + if input in nodes[0].operator.inputs: + gelu_op_input = input + break + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + gelu_op = self.transform.make_operator( + op_type=OP.GELU, + inputs=[gelu_op_input], + outputs=[nodes[-1].operator.outputs[0]] + ) + self.transform.add_operator(gelu_op) + +@dataclasses.dataclass +class NormalizeAttr(BaseOperatorAttr): + p: float = 2.0 + epsilon: float = 1e-12 + axis: int = 1 + + +@registe_operator(OP.GELU) +class GeluOperator(BaseOperator): + + def call( + self, + executor, + operator: Operator, + inputs: List, + attr: NormalizeAttr, + ): + return F.gelu(inputs[0]) + + def convert_onnx_operator( + self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto + ) -> Operator: + return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) + + def quantize( + self, + graph: Graph, + op: Operator, + operator_observer_config: QuantOperatorObserverConfig, + quant_outputs: bool = False, + ): + return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) + + + +class ClearUnsedVariables(BasePass): + + def process(self, graph: Graph) -> Graph: + vars = list(graph.variables) + + for var in vars: + if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): + graph.delete_variable(var) + + quant_params = list(graph.quant_parameters.keys()) + for var in quant_params: + if not graph.containe_var(var): + graph.quant_parameters.pop(var) + + return graph + +class FormatLayerNorm(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if "LayerNorm" in op.op_type: + self.format_layer_norm(graph, op) + return graph + + def format_layer_norm(self, graph, operator): + if not hasattr(operator.attributes, "axis"): + return + if isinstance(operator.attributes.axis, (tuple, list)): + operator.attributes.axis = operator.attributes.axis[0] + +class FormatReshape(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if op.op_type == "Reshape": + self.format_reshape(graph, op) + + return graph + + def format_reshape(self, graph, operator): + shape = graph.get_variable(operator.inputs[1]) + shape.value = torch.tensor(shape.value, dtype=torch.int64) + +class FormatScalar(BasePass): + + def process(self, graph: Graph): + for var in graph.variables.values(): + var: Variable + use_ops = graph.get_dst_operators(var) + + if len(use_ops) == 0: + continue + + if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: + continue + + if var.value is not None and var.value.ndim == 0: + var.value = var.value.reshape(1) + print(f"Reshape scalar to tensor for {var.name}.") + + return graph + +class RenamePass(BasePass): + + def process(self, graph:Graph): + + names = [name for name in graph.operators.keys()] + for old_name in names: + new_name = old_name.replace("/", "#") + + graph.rename_operator(old_name, new_name) + + names = [name for name in graph.variables.keys()] + for name in names: + new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") + + graph.rename_vaiable(name, new_name, + with_variables=True, + with_operator_outputs=True) + + return graph + +def create_pipeline(example_inputs): + return PassSequence( + # FuseLayerNormPass(), + FusedGeluPass(), + + # ClearUnsedVariables(), + # FormatLayerNorm(), + # FormatReshape(), + # FormatScalar(), + # RenamePass() + ) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--onnx_path", type=str) + parser.add_argument("--dst_onnx_path", type=str) + + parser.add_argument("--bsz", type=int, default=8, + help="Batch size") + parser.add_argument("--imgsz", type=int, default=224, + help="Image size") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) + + refine_pipline = Pipeline( + create_source(f"{args.onnx_path}", example_inputs=example_inputs), + create_pipeline(example_inputs), + create_target( + f"{args.dst_onnx_path}", + example_inputs=example_inputs, + ) + ) + refine_pipline.run() + + print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_accuracy.sh b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_accuracy.sh new file mode 100644 index 00000000..30890ad6 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_accuracy.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_performance.sh b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_performance.sh new file mode 100644 index 00000000..dbd6c8c7 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_fp16_performance.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_accuracy.sh b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_accuracy.sh new file mode 100644 index 00000000..2b2db01a --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_accuracy.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +set -x +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=int8 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_performance.sh b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_performance.sh new file mode 100644 index 00000000..bec51520 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/scripts/infer_squeezenet_v11_int8_performance.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=int8 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + + # Change Batchsize + let step++ + echo; + echo [STEP ${step}] : Change Batchsize + FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx + if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed + else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} + fi + + # Build Engine + let step++ + echo; + echo [STEP ${step}] : Build Engine + ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine + if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed + else + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} + fi + +# Inference +# let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_1.1/ixrt/simplify_model.py b/models/cv/classification/squeezenet_1.1/ixrt/simplify_model.py new file mode 100644 index 00000000..4d53a474 --- /dev/null +++ b/models/cv/classification/squeezenet_1.1/ixrt/simplify_model.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--reshape", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) + + + + -- Gitee