From 3997961b3fdc7850a3706a71478c3891545769e7 Mon Sep 17 00:00:00 2001 From: "xinchi.tian" Date: Mon, 6 May 2024 15:32:31 +0800 Subject: [PATCH] Add Detr model in IXRT link #I9FP0V Add Detr model in IXRT Signed-off-by: xinchi.tian --- models/cv/detection/detr/ixrt/README.md | 55 ++++ models/cv/detection/detr/ixrt/build_engine.py | 59 ++++ .../detr/ixrt/calibration_dataset.py | 43 +++ models/cv/detection/detr/ixrt/coco_labels.py | 104 +++++++ models/cv/detection/detr/ixrt/common.py | 116 +++++++ .../cv/detection/detr/ixrt/config/DETR_CONFIG | 44 +++ .../detection/detr/ixrt/datasets/__init__.py | 14 + .../cv/detection/detr/ixrt/datasets/coco.py | 131 ++++++++ .../cv/detection/detr/ixrt/datasets/common.py | 83 +++++ .../detr/ixrt/datasets/post_process.py | 173 +++++++++++ .../detr/ixrt/datasets/pre_process.py | 91 ++++++ .../cv/detection/detr/ixrt/datasets/vision.py | 151 +++++++++ models/cv/detection/detr/ixrt/deploy.py | 123 ++++++++ models/cv/detection/detr/ixrt/export_model.py | 121 ++++++++ .../detr/ixrt/extract_graph_weight.py | 139 +++++++++ models/cv/detection/detr/ixrt/inference.py | 239 ++++++++++++++ .../detection/detr/ixrt/load_ixrt_plugin.py | 27 ++ .../detection/detr/ixrt/modify_batchsize.py | 179 +++++++++++ models/cv/detection/detr/ixrt/refine_model.py | 291 ++++++++++++++++++ .../detr/ixrt/refine_utils/__init__.py | 14 + .../detr/ixrt/refine_utils/common.py | 37 +++ .../detr/ixrt/refine_utils/linear_pass.py | 114 +++++++ .../ixrt/refine_utils/matmul_to_gemm_pass.py | 55 ++++ .../ixrt/scripts/infer_detr_fp16_accuracy.sh | 142 +++++++++ .../scripts/infer_detr_fp16_performance.sh | 142 +++++++++ .../cv/detection/detr/ixrt/simplify_model.py | 36 +++ 26 files changed, 2723 insertions(+) create mode 100755 models/cv/detection/detr/ixrt/README.md create mode 100644 models/cv/detection/detr/ixrt/build_engine.py create mode 100644 models/cv/detection/detr/ixrt/calibration_dataset.py create mode 100644 models/cv/detection/detr/ixrt/coco_labels.py create mode 100644 models/cv/detection/detr/ixrt/common.py create mode 100644 models/cv/detection/detr/ixrt/config/DETR_CONFIG create mode 100644 models/cv/detection/detr/ixrt/datasets/__init__.py create mode 100644 models/cv/detection/detr/ixrt/datasets/coco.py create mode 100644 models/cv/detection/detr/ixrt/datasets/common.py create mode 100644 models/cv/detection/detr/ixrt/datasets/post_process.py create mode 100644 models/cv/detection/detr/ixrt/datasets/pre_process.py create mode 100755 models/cv/detection/detr/ixrt/datasets/vision.py create mode 100644 models/cv/detection/detr/ixrt/deploy.py create mode 100644 models/cv/detection/detr/ixrt/export_model.py create mode 100644 models/cv/detection/detr/ixrt/extract_graph_weight.py create mode 100755 models/cv/detection/detr/ixrt/inference.py create mode 100644 models/cv/detection/detr/ixrt/load_ixrt_plugin.py create mode 100644 models/cv/detection/detr/ixrt/modify_batchsize.py create mode 100644 models/cv/detection/detr/ixrt/refine_model.py create mode 100644 models/cv/detection/detr/ixrt/refine_utils/__init__.py create mode 100644 models/cv/detection/detr/ixrt/refine_utils/common.py create mode 100644 models/cv/detection/detr/ixrt/refine_utils/linear_pass.py create mode 100644 models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py create mode 100755 models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh create mode 100755 models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh create mode 100644 models/cv/detection/detr/ixrt/simplify_model.py diff --git a/models/cv/detection/detr/ixrt/README.md b/models/cv/detection/detr/ixrt/README.md new file mode 100755 index 00000000..e1b76b56 --- /dev/null +++ b/models/cv/detection/detr/ixrt/README.md @@ -0,0 +1,55 @@ +# Detr + +## Description +DETR (DEtection TRansformer) is a novel approach that views object detection as a direct set prediction problem. This method streamlines the detection process, eliminating the need for many hand-designed components like non-maximum suppression procedures or anchor generation, which are typically used to explicitly encode prior knowledge about the task. + +## Setup + +### Install +```bash +yum install mesa-libGL +pip3 install tqdm +pip3 install pycuda +pip3 install onnx +pip3 install onnxsim +pip3 install tabulate +pip3 install cv2 +pip3 install pycocotools +pip3 install opencv-python==4.6.0.66 +``` + +### Download +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion +```bash +mkdir checkpoints +python3 export_model.py --torch_file /path/to/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth --onnx_file checkpoints/detr_res50.onnx --bsz 1 +``` + +## Inference +```bash +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/coco2017/ +export CHECKPOINTS_DIR=./checkpoints +export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +export EVAL_DIR=${DATASETS_DIR}/val2017 +export RUN_DIR=./ +export CONFIG_DIR=config/DETR_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_detr_fp16_accuracy.sh +# Performance +bash scripts/infer_detr_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 +--------|-----------|----------|----------|----------|------------ +Detr | 1 | FP16 | 65.84 | 0.370 | 0.198 \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/build_engine.py b/models/cv/detection/detr/ixrt/build_engine.py new file mode 100644 index 00000000..3a00b513 --- /dev/null +++ b/models/cv/detection/detr/ixrt/build_engine.py @@ -0,0 +1,59 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + # parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + if precision == tensorrt.BuilderFlag.INT8: + parser.parse_from_files(config.model, config.quant_file) + else: + parser.parse_from_file(config.model) + + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--quant_file", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/calibration_dataset.py b/models/cv/detection/detr/ixrt/calibration_dataset.py new file mode 100644 index 00000000..de37775a --- /dev/null +++ b/models/cv/detection/detr/ixrt/calibration_dataset.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/coco_labels.py b/models/cv/detection/detr/ixrt/coco_labels.py new file mode 100644 index 00000000..43f5bd82 --- /dev/null +++ b/models/cv/detection/detr/ixrt/coco_labels.py @@ -0,0 +1,104 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/detection/detr/ixrt/common.py b/models/cv/detection/detr/ixrt/common.py new file mode 100644 index 00000000..c8d4a7b9 --- /dev/null +++ b/models/cv/detection/detr/ixrt/common.py @@ -0,0 +1,116 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import numpy as np +from tqdm import tqdm + +import tensorrt +import pycuda.driver as cuda + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result): + for i, boxes in enumerate(pred_boxes): + image_id = int(batch_img_id) + if boxes is not None: + x, y, w, h, c, p = boxes + if image_id!=-1: + + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": c, + "bbox": [x, y, w, h], + "score": p, + } + ) + +# def save2json(batch_img_id, pred_boxes, json_result, class_trans): +# for i, boxes in enumerate(pred_boxes): +# if boxes is not None: +# image_id = int(batch_img_id[i]) +# # have no target +# if image_id == -1: +# continue +# for x, y, w, h, c, p in boxes: +# x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) +# c = int(c) +# json_result.append( +# { +# "image_id": image_id, +# "category_id": class_trans[c - 1], +# "bbox": [x, y, w, h], +# "score": p, +# } +# ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/config/DETR_CONFIG b/models/cv/detection/detr/ixrt/config/DETR_CONFIG new file mode 100644 index 00000000..ec9562e1 --- /dev/null +++ b/models/cv/detection/detr/ixrt/config/DETR_CONFIG @@ -0,0 +1,44 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=800 +MODEL_NAME=detr +ORIGINE_MODEL=detr_res50.onnx +DATA_PROCESS_TYPE=detr +MODEL_INPUT_NAMES=(inputs) + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/__init__.py b/models/cv/detection/detr/ixrt/datasets/__init__.py new file mode 100644 index 00000000..162e24b4 --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/coco.py b/models/cv/detection/detr/ixrt/datasets/coco.py new file mode 100644 index 00000000..73c5df54 --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/coco.py @@ -0,0 +1,131 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/detection/detr/ixrt/datasets/common.py b/models/cv/detection/detr/ixrt/datasets/common.py new file mode 100644 index 00000000..febaf0ea --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/common.py @@ -0,0 +1,83 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + + return boxes \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/post_process.py b/models/cv/detection/detr/ixrt/datasets/post_process.py new file mode 100644 index 00000000..91afc4b6 --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/post_process.py @@ -0,0 +1,173 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np +import torch +import torch.nn.functional as F + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + elif data_process_type == "detr": + return DetrPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def box_cxcywh_to_xyxy(x): + x_c, y_c, w, h = x.unbind(-1) + b = [(x_c - 0.5 * w), (y_c - 0.5 * h), + (x_c + 0.5 * w), (y_c + 0.5 * h)] + return torch.stack(b, dim=-1) + + +def convert_to_xywh(boxes): + xmin, ymin, xmax, ymax = boxes.unbind(-1) + return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) + +def DetrPostprocess(pred_logits, pred_boxes, target_sizes): + + out_logits = torch.from_numpy(pred_logits) + out_bbox = torch.from_numpy(pred_boxes) + assert len(target_sizes) == 2 + + prob = F.softmax(out_logits, -1) + scores, labels = prob[..., :-1].max(-1) + + # convert to [x0, y0, x1, y1] format + boxes = box_cxcywh_to_xyxy(out_bbox) + # and from relative [0, 1] to absolute [0, height] coordinates + img_w, img_h = target_sizes + scale_fct = torch.tensor([img_w, img_h, img_w, img_h]) + boxes = boxes * scale_fct + + + boxes = clip_boxes(boxes, target_sizes) + boxes = convert_to_xywh(boxes) + + labels = labels.unsqueeze(1) + scores =scores.unsqueeze(1) + pred_boxes = torch.cat([ + boxes, + labels, + scores], dim=1).numpy().tolist() + return pred_boxes \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/pre_process.py b/models/cv/detection/detr/ixrt/datasets/pre_process.py new file mode 100644 index 00000000..c7f490df --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/pre_process.py @@ -0,0 +1,91 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + elif data_process_type == "detr": + return DetrPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img + +def DetrPreprocess(image, img_size): + # img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) + # img = img.resize((img_size, img_size)) + + std = [0.485, 0.456, 0.406] + mean = [0.229, 0.224, 0.225] + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + + image[0,:,:] = (image[0,:,:]- std[0])/mean[0] + image[1,:,:] = (image[1,:,:]- std[1])/mean[1] + image[2,:,:] = (image[2,:,:]- std[2])/mean[2] + + return image + \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/datasets/vision.py b/models/cv/detection/detr/ixrt/datasets/vision.py new file mode 100755 index 00000000..eadefb2c --- /dev/null +++ b/models/cv/detection/detr/ixrt/datasets/vision.py @@ -0,0 +1,151 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/detection/detr/ixrt/deploy.py b/models/cv/detection/detr/ixrt/deploy.py new file mode 100644 index 00000000..d1052d2b --- /dev/null +++ b/models/cv/detection/detr/ixrt/deploy.py @@ -0,0 +1,123 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/export_model.py b/models/cv/detection/detr/ixrt/export_model.py new file mode 100644 index 00000000..55385fb3 --- /dev/null +++ b/models/cv/detection/detr/ixrt/export_model.py @@ -0,0 +1,121 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse + +import torch +import onnx +from onnx import shape_inference +from onnxsim import simplify + + +validate=True + +def stat_model(onnx_file): + onnx_model = onnx.load(onnx_file) + graph = onnx_model.graph + + op_types = list() + for node in graph.node: + op_types.append(node.op_type) + + print(set(op_types)) + +def ort_inference(onnx_file, input): + import onnxruntime as ort + + ort_session = ort.InferenceSession(onnx_file, + providers=['CPUExecutionProvider']) + in_name = ort_session.get_inputs()[0].name + + onnx_outputs = ort_session.get_outputs() + output_names = [] + for o in onnx_outputs: + output_names.append(o.name) + + input_np = input.clone().cpu().numpy() + out = ort_session.run(output_names, + input_feed={in_name: input_np} + ) + return out + +def convert_model(onnx_file, config): + model = torch.hub.load('facebookresearch/detr:main', 'detr_resnet50', pretrained=True) + model.eval() + + input = torch.randn([config.bsz, 3, config.img_H, config.img_W]) + out = model(input) + torch.onnx.export( + model, + input, + onnx_file, + verbose = False, + input_names = ["input"], + output_names = ["pred_logits","pred_boxes"], + opset_version = 11 + ) + + onnx_model = onnx.load(onnx_file) # load onnx model + model_simp, check = simplify(onnx_model) + assert check, "Simplified ONNX model could not be validated" + + onnx_model = shape_inference.infer_shapes(model_simp) + + onnx.save(onnx_model, onnx_file) + print('finished exporting onnx') + + # stat_model(onnx_file) + + if validate: + torch_out = model(input)["pred_logits"] + onnx_out = ort_inference(onnx_file, input)[0] + + import numpy as np + torch_out = torch_out.detach().numpy() + diff = np.abs(torch_out-onnx_out).max() + print(diff) + #sim = cosine_similarity(torch_out.reshape(1,-1), onnx_out.reshape(1, -1)) + #print(sim[0]) + + +def parse_config(): + parser = argparse.ArgumentParser() + + parser.add_argument("--torch_file", type=str, help="torch model") + parser.add_argument("--onnx_file", type=str, help="onnx model",default="") + parser.add_argument("--bsz", type=int, default=1, help="test batch size") + parser.add_argument( + "--img_H", + type=int, + default=800, + help="inference size h", + ) + parser.add_argument( + "--img_W", + type=int, + default=800, + help="inference size W", + ) + + + config = parser.parse_args() + return config + +if __name__ == "__main__": + + config = parse_config() + onnx_file = config.onnx_file + convert_model(onnx_file, config) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/extract_graph_weight.py b/models/cv/detection/detr/ixrt/extract_graph_weight.py new file mode 100644 index 00000000..9094316e --- /dev/null +++ b/models/cv/detection/detr/ixrt/extract_graph_weight.py @@ -0,0 +1,139 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import collections +import json +import os + +import numpy as np +import onnx + +def parse_onnx_model(onnx_model): + graph = onnx_model.graph + nodes = graph.node + initializer = graph.initializer + value_info = graph.value_info + model = {} + model["nodes"] = {} + model["tensors"] = {} + model["edges"] = {} + all_edge = [] + for i, item in enumerate(nodes): + node_name = item.name + input_edge_list = list(item.input) + output_edge_list = list(item.output) + all_edge.extend(input_edge_list) + all_edge.extend(output_edge_list) + node_dict = {"inputs": input_edge_list, "outputs": output_edge_list} + node_dict["op_type"] = item.op_type + attribute_dict = {} + for attr in item.attribute: + if attr.type == onnx.AttributeProto().AttributeType.FLOAT: + attribute_dict[attr.name] = attr.f + if attr.type == onnx.AttributeProto().AttributeType.FLOATS: + attribute_dict[attr.name] = [x for x in attr.floats] + if attr.type == onnx.AttributeProto().AttributeType.INT: + attribute_dict[attr.name] = attr.i + if attr.type == onnx.AttributeProto().AttributeType.INTS: + attribute_dict[attr.name] = [x for x in attr.ints] + if attr.type == onnx.AttributeProto().AttributeType.STRING: + attribute_dict[attr.name] = str(attr.s.decode("UTF-8")) + if attr.type == onnx.AttributeProto().AttributeType.STRINGS: + attribute_dict[attr.name] = [ + str(x.decode("UTF-8")) for x in attr.strings + ] + node_dict["attrbiute"] = attribute_dict + model["nodes"][node_name] = node_dict + + constant_edge = [] + for i, item in enumerate(initializer): + tensor_name = item.name + constant_edge.append(tensor_name) + if item.data_type == 1: + tensor_dict = {"data_type": "float32"} + elif item.data_type == 3: + tensor_dict = {"data_type": "int32"} + elif item.data_type == 7: + tensor_dict = {"data_type": "int64"} + tensor_dict["dims"] = list(item.dims) + + model["tensors"][tensor_name] = tensor_dict + + miss_edge = [] + for edge in all_edge: + if edge not in constant_edge: + miss_edge.append(edge) + + for info in value_info: + info_name = info.name + if info_name in miss_edge: + edge_dict = { + "dims": [int(x.dim_value) for x in info.type.tensor_type.shape.dim] + } + model["edges"][info_name] = edge_dict + + """ + Export weight + """ + var_dict = collections.OrderedDict() + for item in initializer: + tensor_name = item.name + tensor_shape = list(item.dims) + if len(tensor_shape) == 0: + continue + + if item.data_type == 1 and len(item.float_data): + np_data = np.array(list(item.float_data), dtype=np.float32) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 1 and len(item.raw_data): + np_data = np.frombuffer(item.raw_data, dtype=np.float32) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 3 and len(item.int32_data): + np_data = np.array(list(item.int32_data), dtype=np.int32) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 3 and len(item.raw_data): + np_data = np.frombuffer(item.raw_data, dtype=np.int32) + np_data.dtype = np.int32 + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 7 and len(item.raw_data): + np_data = np.frombuffer(item.raw_data, dtype=np.int64) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + elif item.data_type == 7 and len(item.int64_data): + temp = [] + for i in item.int64_data: + temp.append(i) + np_data = np.array(temp, dtype=np.int64) + np_data = np_data.reshape(tensor_shape) + var_dict[tensor_name] = np_data + else: + print( + "tensor name: ", + tensor_name, + ", type: ", + item.data_type, + ", len: ", + len(item.raw_data), + len(item.float_data), + len(item.int32_data), + len(item.int64_data), + ", will not save into weights file", + ) + return model, var_dict \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/inference.py b/models/cv/detection/detr/ixrt/inference.py new file mode 100755 index 00000000..eb33b614 --- /dev/null +++ b/models/cv/detection/detr/ixrt/inference.py @@ -0,0 +1,239 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type, + workers=8 + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + batch_pred_logits = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + batch_pred_boxes = np.zeros(outputs[1]["shape"], outputs[1]["dtype"]) + print(f"pred_logits shape : {batch_pred_logits.shape} pred_logits type : {batch_pred_logits.dtype}") + print(f"pred_boxes shape : {batch_pred_boxes.shape} pred_boxes type : {batch_pred_boxes.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + + cur_bsz_sample = batch_data.shape[0] + + # Set input + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + + # Forward + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + cuda.memcpy_dtoh(batch_pred_logits, outputs[0]["allocation"]) + cuda.memcpy_dtoh(batch_pred_boxes, outputs[1]["allocation"]) + + for (pred_logits, pred_boxes, img_h, img_w, img_id) in zip( + batch_pred_logits, + batch_pred_boxes, + batch_img_shape[0], + batch_img_shape[1], + batch_img_id): + pred_boxes = post_process_func(pred_logits, pred_boxes, [img_w, img_h]) + # print(img_id) + # print(img_w, img_h) + + # import ipdb + # ipdb.set_trace() + + save2json(img_id, pred_boxes, json_result) + + fps = num_samples / forward_time + + if config.test_mode == "FPS": + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(1) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/load_ixrt_plugin.py b/models/cv/detection/detr/ixrt/load_ixrt_plugin.py new file mode 100644 index 00000000..ae47dc8e --- /dev/null +++ b/models/cv/detection/detr/ixrt/load_ixrt_plugin.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/modify_batchsize.py b/models/cv/detection/detr/ixrt/modify_batchsize.py new file mode 100644 index 00000000..5c01e8b6 --- /dev/null +++ b/models/cv/detection/detr/ixrt/modify_batchsize.py @@ -0,0 +1,179 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from copy import deepcopy +import numpy as np +import onnx +from onnx import numpy_helper + +from extract_graph_weight import parse_onnx_model + + +def modify_shape_dim(dim, bsz): + batch_size = bsz + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim.dim_value = int(batch_size) + else: + # set batch size of 1 + dim.dim_value = 1 + +def change_input_dim(onnx_model, bsz): + inputs = onnx_model.graph.input + for input in inputs: + dim1 = input.type.tensor_type.shape.dim[0] + old_bsz = dim1.dim_value + modify_shape_dim(dim1, bsz) + return old_bsz + +# input[1] shape is initializer +def change_reshape_initializer(model, var_dict, old_bsz, bsz): + print("change_reshape_initializer") + modified_list = list() + for name, node_dict in model["nodes"].items(): + if node_dict["op_type"] != "Reshape": + continue + shape_name = node_dict["inputs"][1] + new_datas = deepcopy(var_dict[shape_name]) + done = False + if (len(new_datas) == 2): + if new_datas[0] == 625: + new_datas[0] = 625 * (bsz / old_bsz) + if new_datas[0] / old_bsz == 100: + new_datas[0] = 100 * bsz + elif (len(new_datas) == 3): + for i in range(len(new_datas)): + if new_datas[i] == old_bsz: + new_datas[i] = bsz + done = True + if done == False: + for i in range(len(new_datas)): + if new_datas[i] / old_bsz == 8: + new_datas[i] = (bsz / old_bsz) * 8 + done = True + + var_dict[shape_name] = new_datas + modified_list.append(shape_name) + return modified_list + +def change_matmul_initializer(model, var_dict, bsz): + print("change_matmul_initializer") + modified_list = list() + for name, node_dict in model["nodes"].items(): + if node_dict["op_type"] != "MatMul": + continue + for edge_name in node_dict["inputs"]: + if edge_name not in var_dict: + continue + if len(var_dict[edge_name].shape) != 3: + continue + data = deepcopy(var_dict[edge_name]) + + datas = list() + for _ in range(bsz): + datas.append(data) + new_datas = np.concatenate(datas, axis=0) + var_dict[edge_name] = new_datas + modified_list.append(edge_name) + return modified_list + +def change_add_initializer(model, var_dict, bsz): + print("change_add_initializer") + modified_list = list() + for name, node_dict in model["nodes"].items(): + if node_dict["op_type"] != "Add": + continue + for edge_name in node_dict["inputs"]: + if edge_name not in var_dict: + continue + if len(var_dict[edge_name].shape) != 3: + continue + data = deepcopy(var_dict[edge_name])[:, 0:1, ...] + + datas = list() + for _ in range(bsz): + datas.append(data) + new_datas = np.concatenate(datas, axis=1) + var_dict[edge_name] = new_datas + modified_list.append(edge_name) + return modified_list + +# A certain mode, input for Concat operator maybe constant. +def change_concat_initializer(model, var_dict, bsz): + print("change_concat_initializer") + modified_list = list() + for name, node_dict in model["nodes"].items(): + if node_dict["op_type"] != "Concat": + continue + for edge_name in node_dict["inputs"]: + if edge_name not in var_dict: + continue + data = deepcopy(var_dict[edge_name])[0:1, ...] + + datas = list() + for _ in range(bsz): + datas.append(data) + new_datas = np.concatenate(datas, axis=0) + var_dict[edge_name] = new_datas + modified_list.append(edge_name) + return modified_list + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + onnx_model = onnx.load(args.origin_model) + bsz = args.batch_size + old_bsz = change_input_dim(onnx_model, bsz) + if old_bsz == bsz: + print("Change batch size skipped") + onnx.save(onnx_model, args.output_model) + exit() + + model, weights = parse_onnx_model(onnx_model) + + modified_list = list() + reshape_modified = change_reshape_initializer(model, weights, old_bsz, bsz) + concat_modified = change_concat_initializer(model, weights, bsz) + matmul_modified = change_matmul_initializer(model, weights, bsz) + add_modified = change_add_initializer(model, weights, bsz) + modified_list.extend(reshape_modified) + modified_list.extend(concat_modified) + modified_list.extend(matmul_modified) + modified_list.extend(add_modified) + + # Remove the old initializer, and append new. + initializer = onnx_model.graph.initializer + for name in modified_list: + for item in initializer: + if name == item.name: + initializer.remove(item) + + data = weights[name] + new_params = numpy_helper.from_array(data, name=name) + initializer.append(new_params) + + onnx.save(onnx_model, args.output_model) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/refine_model.py b/models/cv/detection/detr/ixrt/refine_model.py new file mode 100644 index 00000000..0483e0e9 --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_model.py @@ -0,0 +1,291 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import dataclasses + +import torch +import onnx + +from refine_utils.matmul_to_gemm_pass import FusedGemmPass +from refine_utils.linear_pass import FusedLinearPass + +from refine_utils.common import * + +def get_constant_input_name_of_operator(graph: Graph, operator: Operator): + const = None + for input in operator.inputs: + if not graph.containe_var(input): + continue + + if not graph.is_leaf_variable(input): + continue + + input_var = graph.get_variable(input) + if input_var.value is not None: + const = input + return const + +class FuseLayerNormPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + find_sequence_subgraph( + graph, + [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], + self.fuse_layer_norm, + strict=False + ) + return graph + + def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): + # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 + if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: + return + + # 检查 POW 的输入是否和 DIV 的输入是一致的 + if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: + return + + # 检查部分算子的输出是否被多个算子使用 + nodes = pattern.nodes + for node in [nodes[0]] + nodes[2:-1]: + next_ops = graph.get_next_operators(node.operator) + if len(next_ops) > 1: + return + + eps = None + for input in nodes[4].operator.inputs: + input_var = graph.get_variable(input) + if input_var.value is not None and graph.is_leaf_variable(input): + eps = to_py_type(input_var.value) + + scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) + bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + bias_var = graph.get_variable(bias) + print(bias_var) + + attributes = { + "axis": nodes[0].operator.attributes.axes, + "epsilon": eps, + } + + + layer_norm_op = self.transform.make_operator( + op_type="LayerNormalization", + inputs=[nodes[0].operator.inputs[0], scale, bias], + outputs=[nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(layer_norm_op) + +class FusedGeluPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True + ) + return graph + + def fuse_gelu(self, graph: Graph, pattern: PatternGraph): + nodes = pattern.nodes + prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] + next_ops = self.transform.get_next_operators(prev_op) + if len(next_ops) != 2: + return + + if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: + return + + gelu_op_input = None + for input in nodes[3].operator.inputs: + if input in nodes[0].operator.inputs: + gelu_op_input = input + break + + self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) + + gelu_op = self.transform.make_operator( + op_type=OP.GELU, + inputs=[gelu_op_input], + outputs=[nodes[-1].operator.outputs[0]] + ) + self.transform.add_operator(gelu_op) + +@dataclasses.dataclass +class NormalizeAttr(BaseOperatorAttr): + p: float = 2.0 + epsilon: float = 1e-12 + axis: int = 1 + + +@registe_operator(OP.GELU) +class GeluOperator(BaseOperator): + + def call( + self, + executor, + operator: Operator, + inputs: List, + attr: NormalizeAttr, + ): + return F.gelu(inputs[0]) + + def convert_onnx_operator( + self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto + ) -> Operator: + return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) + + def quantize( + self, + graph: Graph, + op: Operator, + operator_observer_config: QuantOperatorObserverConfig, + quant_outputs: bool = False, + ): + return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) + + + +class ClearUnsedVariables(BasePass): + + def process(self, graph: Graph) -> Graph: + vars = list(graph.variables) + + for var in vars: + if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): + graph.delete_variable(var) + + quant_params = list(graph.quant_parameters.keys()) + for var in quant_params: + if not graph.containe_var(var): + graph.quant_parameters.pop(var) + + return graph + +class FormatLayerNorm(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if "LayerNormalization" in op.op_type: + self.format_layer_norm(graph, op) + return graph + + def format_layer_norm(self, graph, operator): + if not hasattr(operator.attributes, "axis"): + return + if isinstance(operator.attributes.axis, (tuple, list)): + operator.attributes.axis = operator.attributes.axis[0] + +class FormatReshape(BasePass): + + def process(self, graph: Graph) -> Graph: + for op in graph.operators.values(): + if op.op_type == "Reshape": + self.format_reshape(graph, op) + + return graph + + def format_reshape(self, graph, operator): + shape = graph.get_variable(operator.inputs[1]) + shape.value = torch.tensor(shape.value, dtype=torch.int64) + +class FormatScalar(BasePass): + + def process(self, graph: Graph): + for var in graph.variables.values(): + var: Variable + use_ops = graph.get_dst_operators(var) + + if len(use_ops) == 0: + continue + + if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: + continue + + if var.value is not None and var.value.ndim == 0: + var.value = var.value.reshape(1) + print(f"Reshape scalar to tensor for {var.name}.") + + return graph + +class RenamePass(BasePass): + + def process(self, graph:Graph): + + names = [name for name in graph.operators.keys()] + for old_name in names: + new_name = old_name.replace("/", "#") + + graph.rename_operator(old_name, new_name) + + names = [name for name in graph.variables.keys()] + for name in names: + new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") + + graph.rename_vaiable(name, new_name, + with_variables=True, + with_operator_outputs=True) + + return graph + +def create_pipeline(example_inputs): + return PassSequence( + FuseLayerNormPass(), + FusedGeluPass(), + + ClearUnsedVariables(), + FormatLayerNorm(), + FormatReshape(), + # FormatScalar(), + # RenamePass() + ) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--onnx_path", type=str) + parser.add_argument("--dst_onnx_path", type=str) + + parser.add_argument("--bsz", type=int, default=8, + help="Batch size") + parser.add_argument("--imgsz", type=int, default=224, + help="Image size") + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) + + refine_pipline = Pipeline( + create_source(f"{args.onnx_path}", example_inputs=example_inputs), + create_pipeline(example_inputs), + create_target( + f"{args.dst_onnx_path}", + example_inputs=example_inputs, + ) + ) + refine_pipline.run() + + print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/detection/detr/ixrt/refine_utils/__init__.py b/models/cv/detection/detr/ixrt/refine_utils/__init__.py new file mode 100644 index 00000000..162e24b4 --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/refine_utils/common.py b/models/cv/detection/detr/ixrt/refine_utils/common.py new file mode 100644 index 00000000..b19dccfc --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_utils/common.py @@ -0,0 +1,37 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from typing import Union, Callable, List + +from tensorrt.deploy.api import * +from tensorrt.deploy.backend.onnx.converter import default_converter +from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type +from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr +from tensorrt.deploy.ir.operator_type import OperatorType as OP +from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name +from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence +from tensorrt.deploy.ir import Graph +from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator +from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator + +def find_sequence_subgraph(graph, + pattern: Union[List[str], PatternGraph], + callback: Callable[[Graph, PatternGraph], None], + strict=True): + if isinstance(pattern, List): + pattern = build_sequence_graph(pattern) + + matcher = GraphMatcher(pattern, strict=strict) + return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/refine_utils/linear_pass.py b/models/cv/detection/detr/ixrt/refine_utils/linear_pass.py new file mode 100644 index 00000000..bab7e575 --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_utils/linear_pass.py @@ -0,0 +1,114 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import dataclasses + +from refine_utils.common import * + +# AXB=C, Only for B is initializer + +class FusedLinearPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True + ) + find_sequence_subgraph( + graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True + ) + return graph + + def to_linear_with_bias(self, graph, pattern: PatternGraph): + matmul = pattern.nodes[0] + add = pattern.nodes[1] + if len(add.operator.inputs) != 2: + return + + b_var = graph.get_variable(matmul.operator.inputs[1]) + if not graph.is_leaf_variable(b_var) or b_var.value is None: + return + + if b_var.value.ndim != 2: + return + + bias_var = None + for input in add.operator.inputs: + if input not in matmul.operator.outputs: + bias_var = input + + inputs = matmul.operator.inputs + inputs.append(bias_var) + outputs = add.operator.outputs + + b_var.value = b_var.value.transpose(1, 0) + b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] + + hidden_size = b_var.shape[1] + linear_dim = b_var.shape[0] + + attributes = { + "hidden_size": hidden_size, + "linear_dim": linear_dim, + "has_bias": 1, + "act_type":"none" + } + + self.transform.make_operator( + "LinearFP16", + inputs=inputs, + outputs=outputs, + **attributes + ) + + self.transform.delete_operator(add.operator) + self.transform.delete_operator(matmul.operator) + + def to_linear(self, graph, pattern: PatternGraph): + matmul = pattern.nodes[0] + if len(matmul.operator.inputs) != 2: + return + + b_var = graph.get_variable(matmul.operator.inputs[1]) + if not graph.is_leaf_variable(b_var) or b_var.value is None: + return + + if b_var.value.ndim != 2: + return + + attributes = { + "hidden_size": hidden_size, + "linear_dim": linear_dim, + "has_bias": 0, + "act_type": "none" + } + + b_var.value = b_var.value.transpose(1, 0) + b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] + + hidden_size = b_var.shape[1] + linear_dim = b_var.shape[0] + + op = self.transform.make_operator( + op_type = "LinearFP16", + inputs = pattern.nodes[0].operator.inputs, + outputs=[pattern.nodes[-1].operator.outputs[0]], + **attributes + ) + + self.transform.add_operator(op) + + self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py new file mode 100644 index 00000000..5823c4a5 --- /dev/null +++ b/models/cv/detection/detr/ixrt/refine_utils/matmul_to_gemm_pass.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from refine_utils.common import * + +# +# Common pattern Matmul to Gemm +# +class FusedGemmPass(BasePass): + + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + find_sequence_subgraph( + graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True + ) + return graph + + def to_gemm(self, graph, pattern: PatternGraph): + matmul_op = pattern.nodes[0] + inputs = matmul_op.operator.inputs + outputs = matmul_op.operator.outputs + + if len(inputs)!=2 and len(outputs)!=1: + return + + for input in inputs: + if self.transform.is_leaf_variable(input): + return + + print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") + self.transform.delete_operator(matmul_op.operator) + + op = self.transform.make_operator( + op_type = "Gemm", + inputs = inputs, + outputs = outputs, + alpha = 1, + beta = 1, + transB = 1 + ) + + self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh new file mode 100755 index 00000000..f81312a1 --- /dev/null +++ b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_accuracy.sh @@ -0,0 +1,142 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=1 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model Skipped, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Refine Model +let step++ +echo; +echo [STEP ${step}] : Refine Model +REFINE_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_refine.onnx +if [ -f ${REFINE_MODEL} ];then + echo " "Refine Model Skipped, ${REFINE_MODEL} has been existed +else + python3 ${RUN_DIR}/refine_model.py \ + --onnx_path ${SIM_MODEL} \ + --dst_onnx_path ${REFINE_MODEL} \ + --bsz ${BSZ} \ + --imgsz ${IMGSIZE} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skipped, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${REFINE_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh new file mode 100755 index 00000000..a3881a3c --- /dev/null +++ b/models/cv/detection/detr/ixrt/scripts/infer_detr_fp16_performance.sh @@ -0,0 +1,142 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=1 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model Skipped, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Refine Model +let step++ +echo; +echo [STEP ${step}] : Refine Model +REFINE_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_refine.onnx +if [ -f ${REFINE_MODEL} ];then + echo " "Refine Model Skipped, ${REFINE_MODEL} has been existed +else + python3 ${RUN_DIR}/refine_model.py \ + --onnx_path ${SIM_MODEL} \ + --dst_onnx_path ${REFINE_MODEL} \ + --bsz ${BSZ} \ + --imgsz ${IMGSIZE} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skipped, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${REFINE_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/detr/ixrt/simplify_model.py b/models/cv/detection/detr/ixrt/simplify_model.py new file mode 100644 index 00000000..1400fd81 --- /dev/null +++ b/models/cv/detection/detr/ixrt/simplify_model.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file -- Gitee