diff --git a/models/cv/detection/yolov3/ixrt/README.md b/models/cv/detection/yolov3/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74a62e2eeda0a77db8051e24489e80ee533a85fa --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/README.md @@ -0,0 +1,74 @@ +# YOLOv3 + +## Description + +YOLOv3 is a influential object detection algorithm.The key innovation of YOLOv3 lies in its ability to efficiently detect and classify objects in real-time with a single pass through the neural network. YOLOv3 divides an input image into a grid and predicts bounding boxes, class probabilities, and objectness scores for each grid cell. + +## Setup + +### Install +``` +yum install mesa-libGL +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install ultralytics +pip3 install pycocotools +pip3 install cv2 +pip3 install opencv-python==4.6.0.66 +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + - 图片目录: Path/To/val2017/*.jpg + - 标注文件目录: Path/To/annotations/instances_val2017.json + +### Model Conversion +```bash + +mkdir checkpoints +git clone https://github.com/zldrobit/onnx_tflite_yolov3.git +mv yolov3.weights onnx_tflite_yolov3/weights + +# 修改 detect.py 中 torch.onnx.export() 函数的opset_version=11,会在/weights下生成export.onnx +python3 detect.py --cfg cfg/yolov3.cfg --weights weights/yolov3.weights + +mv export.onnx /Path/to/checkpoints/yolov3.onnx +``` + +## Inference +```bash +export PROJ_DIR=/Path/to/yolov3/ixrt +export DATASETS_DIR=/Path/to/coco2017/ +export CHECKPOINTS_DIR=./checkpoints +export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +export EVAL_DIR=${DATASETS_DIR}/val2017 +export RUN_DIR=/Path/to/yolov3/ixrt +export CONFIG_DIR=config/YOLOV3_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov3_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov3_fp16_performance.sh +``` + +### INT8 +```bash +# Accuracy +bash scripts/infer_yolov3_int8_accuracy.sh +# Performance +bash scripts/infer_yolov3_int8_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 | +--------|-----------|----------|---------|----------|-------------| +YOLOv3 | 32 | FP16 | 757.11 | 0.663 | 0.381 | +YOLOv3 | 32 | INT8 | 1778.34 | 0.659 | 0.356 | \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/build_engine.py b/models/cv/detection/yolov3/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..a919bdd0183197ce125aa5492ec83e58e035675d --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/build_engine.py @@ -0,0 +1,58 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/build_nms_engine.py b/models/cv/detection/yolov3/ixrt/build_nms_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..3be0d83d0d966018f59b87d22f628b9b1ddf9b21 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/build_nms_engine.py @@ -0,0 +1,94 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +def create_onnx(args): + nms = helper.make_node( + "NMS", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/calibration_dataset.py b/models/cv/detection/yolov3/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..de37775a0c617fdefca4342423a6a47bdc9b9c41 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/calibration_dataset.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/coco_labels.py b/models/cv/detection/yolov3/ixrt/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..43f5bd82cd257efdcab2bdba6bad64d9bb90416e --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/coco_labels.py @@ -0,0 +1,104 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/detection/yolov3/ixrt/common.py b/models/cv/detection/yolov3/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..aba2117c9942d6823abf73bf3ab94c291a7705e2 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/common.py @@ -0,0 +1,98 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import numpy as np +from tqdm import tqdm + +import tensorrt +import pycuda.driver as cuda + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/config/YOLOV3_CONFIG b/models/cv/detection/yolov3/ixrt/config/YOLOV3_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..8cbd0f490342f6a7f5e06fa1087bc65cc98afb55 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/config/YOLOV3_CONFIG @@ -0,0 +1,64 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=416 +MODEL_NAME=yolov3 +ORIGINE_MODEL=yolov3.onnx +DATA_PROCESS_TYPE=yolov3 +MODEL_INPUT_NAMES=(input.1) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(/module_list.105/Conv2d/Conv_output_0 /module_list.93/Conv2d/Conv_output_0 /module_list.81/Conv2d/Conv_output_0) +DECODER_8_ANCHOR=(10 13 16 30 33 23) +DECODER_16_ANCHOR=(30 61 62 45 59 119) +DECODER_32_ANCHOR=(116 90 156 198 373 326) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=10647 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/cut_model.py b/models/cv/detection/yolov3/ixrt/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e9ee19aadf0809fe1b97e3225d09150fb54513f7 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/cut_model.py @@ -0,0 +1,31 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/datasets/__init__.py b/models/cv/detection/yolov3/ixrt/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..162e24b462289dcee7b7a2888b93fad1115def81 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/datasets/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/datasets/coco.py b/models/cv/detection/yolov3/ixrt/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..73c5df54761b917ecd0127fb56b61d9bd34c1196 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/datasets/coco.py @@ -0,0 +1,131 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/detection/yolov3/ixrt/datasets/common.py b/models/cv/detection/yolov3/ixrt/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..ef36eba394917bc05af46f33be48463df50f540d --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/datasets/common.py @@ -0,0 +1,81 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/datasets/post_process.py b/models/cv/detection/yolov3/ixrt/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..8590816a0df18b6ef296ebe305b15b81240ab1d0 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/datasets/post_process.py @@ -0,0 +1,130 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/datasets/pre_process.py b/models/cv/detection/yolov3/ixrt/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..c651f8adb7c8190c214fbbbb7769c7d0713e9619 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/datasets/pre_process.py @@ -0,0 +1,71 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/datasets/vision.py b/models/cv/detection/yolov3/ixrt/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..eadefb2c5b35abd0a11fa85c65891461a210aef8 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/datasets/vision.py @@ -0,0 +1,151 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/detection/yolov3/ixrt/deploy.py b/models/cv/detection/yolov3/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..8c2cc424f699e01bc88dab98a29dc4c83e4d9b9e --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/deploy.py @@ -0,0 +1,150 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + else: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/inference.py b/models/cv/detection/yolov3/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..a7a60c878df96d294cdd56efe87a973a0e1f8765 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/inference.py @@ -0,0 +1,264 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0 and config.loop_count < num_samples/bsz : + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + # Set input + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + + # Forward + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + cuda.memcpy_htod(nms_inputs[0]["allocation"], nms_input) + nms_context.execute_v2(nms_allocations) + cuda.memcpy_dtoh(nms_output0, nms_outputs[0]["allocation"]) + cuda.memcpy_dtoh(nms_output1, nms_outputs[1]["allocation"]) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + fps = num_samples / forward_time + + if config.test_mode == "FPS": + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(1) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/load_ixrt_plugin.py b/models/cv/detection/yolov3/ixrt/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..ae47dc8e854b6bea1f768e65c4dd481048bfebce --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/load_ixrt_plugin.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/modify_batchsize.py b/models/cv/detection/yolov3/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..3a88c1603bd6f457fd4965257627dc29edcda4d1 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/modify_batchsize.py @@ -0,0 +1,52 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/quant.py b/models/cv/detection/yolov3/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..36fd39a13c2e1e40f4dc0098f042e66e4bd0d26a --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/quant.py @@ -0,0 +1,67 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_fp16_accuracy.sh b/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..7d6a609e03064b8334da1d3e91b958ead081686f --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_fp16_accuracy.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV3Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_fp16_performance.sh b/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..4fdf2ada533a5ff82d0b68bf8c41648cca6d1eec --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_fp16_performance.sh @@ -0,0 +1,211 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo PROJ_DIR : ${PROJ_DIR} +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV3Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_int8_accuracy.sh b/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..e216261271056f05c59ee49373c5389726c9097b --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_int8_accuracy.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV3Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_int8_performance.sh b/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..7faed28cd3cea40673078c1bbb0eac4311e2c9a6 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/scripts/infer_yolov3_int8_performance.sh @@ -0,0 +1,211 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV3Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov3/ixrt/simplify_model.py b/models/cv/detection/yolov3/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1400fd81ddb4b3fae1b20d0fd35082a692f5d292 --- /dev/null +++ b/models/cv/detection/yolov3/ixrt/simplify_model.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/README.md b/models/cv/detection/yolov5/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..94d7ec15e0a91efae4845825796a31307c7d9539 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/README.md @@ -0,0 +1,78 @@ +# YOLOv5-m + +## Description + +The YOLOv5 architecture is designed for efficient and accurate object detection tasks in real-time scenarios. It employs a single convolutional neural network to simultaneously predict bounding boxes and class probabilities for multiple objects within an image. The YOLOV5m is a medium-sized model. + +## Setup + +### Install +``` +yum install mesa-libGL +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install ultralytics +pip3 install pycocotools +pip3 install cv2 +pip3 install opencv-python==4.6.0.66 +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + - 图片目录: Path/To/val2017/*.jpg + - 标注文件目录: Path/To/annotations/instances_val2017.json + +### Model Conversion +```bash + +mkdir checkpoints +git clone https://github.com/ultralytics/yolov5 +# 切换到需要的版本分支 +git checkout v6.1 + +# 有一些环境需要安装 +wget https://ultralytics.com/assets/Arial.ttf +cp Arial.ttf /root/.config/Ultralytics/Arial.ttf + +# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数) +python3 export.py --weights yolov5m.pt --include onnx --opset 11 --batch-size 32 +mv yolov5m.onnx /Path/to/checkpoints +``` + +## Inference +```bash +export PROJ_DIR=/Path/to/yolov5m/ixrt +export DATASETS_DIR=/Path/to/coco2017/ +export CHECKPOINTS_DIR=./checkpoints +export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +export EVAL_DIR=${DATASETS_DIR}/val2017 +export RUN_DIR=/Path/to/yolov5m/ixrt +export CONFIG_DIR=config/YOLOV5M_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov5m_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov5m_fp16_performance.sh +``` + +### INT8 +```bash +# Accuracy +bash scripts/infer_yolov5m_int8_accuracy.sh +# Performance +bash scripts/infer_yolov5m_int8_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 | +--------|-----------|----------|---------|----------|-------------| +YOLOv5m | 32 | FP16 | 680.93 | 0.637 | 0.447 | +YOLOv5m | 32 | INT8 | 1328.50 | 0.627 | 0.425 | \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/build_engine.py b/models/cv/detection/yolov5/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..a919bdd0183197ce125aa5492ec83e58e035675d --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/build_engine.py @@ -0,0 +1,58 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/build_nms_engine.py b/models/cv/detection/yolov5/ixrt/build_nms_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..3be0d83d0d966018f59b87d22f628b9b1ddf9b21 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/build_nms_engine.py @@ -0,0 +1,94 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +def create_onnx(args): + nms = helper.make_node( + "NMS", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/calibration_dataset.py b/models/cv/detection/yolov5/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..de37775a0c617fdefca4342423a6a47bdc9b9c41 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/calibration_dataset.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/coco_labels.py b/models/cv/detection/yolov5/ixrt/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..43f5bd82cd257efdcab2bdba6bad64d9bb90416e --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/coco_labels.py @@ -0,0 +1,104 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/detection/yolov5/ixrt/common.py b/models/cv/detection/yolov5/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..aba2117c9942d6823abf73bf3ab94c291a7705e2 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/common.py @@ -0,0 +1,98 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import numpy as np +from tqdm import tqdm + +import tensorrt +import pycuda.driver as cuda + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/config/YOLOV5M_CONFIG b/models/cv/detection/yolov5/ixrt/config/YOLOV5M_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..d6342be3715685c6c76bd63fbcc09ea8cf57209a --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/config/YOLOV5M_CONFIG @@ -0,0 +1,64 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolov5m +ORIGINE_MODEL=yolov5m.onnx +DATA_PROCESS_TYPE=yolov5 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(/model.24/m.0/Conv_output_0 /model.24/m.1/Conv_output_0 /model.24/m.2/Conv_output_0) +DECODER_8_ANCHOR=(10 13 16 30 33 23) +DECODER_16_ANCHOR=(30 61 62 45 59 119) +DECODER_32_ANCHOR=(116 90 156 198 373 326) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=25200 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/cut_model.py b/models/cv/detection/yolov5/ixrt/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e9ee19aadf0809fe1b97e3225d09150fb54513f7 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/cut_model.py @@ -0,0 +1,31 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/datasets/__init__.py b/models/cv/detection/yolov5/ixrt/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..162e24b462289dcee7b7a2888b93fad1115def81 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/datasets/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/datasets/coco.py b/models/cv/detection/yolov5/ixrt/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..73c5df54761b917ecd0127fb56b61d9bd34c1196 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/datasets/coco.py @@ -0,0 +1,131 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/detection/yolov5/ixrt/datasets/common.py b/models/cv/detection/yolov5/ixrt/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..ef36eba394917bc05af46f33be48463df50f540d --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/datasets/common.py @@ -0,0 +1,81 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/datasets/post_process.py b/models/cv/detection/yolov5/ixrt/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..8590816a0df18b6ef296ebe305b15b81240ab1d0 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/datasets/post_process.py @@ -0,0 +1,130 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/datasets/pre_process.py b/models/cv/detection/yolov5/ixrt/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..c651f8adb7c8190c214fbbbb7769c7d0713e9619 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/datasets/pre_process.py @@ -0,0 +1,71 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/datasets/vision.py b/models/cv/detection/yolov5/ixrt/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..eadefb2c5b35abd0a11fa85c65891461a210aef8 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/datasets/vision.py @@ -0,0 +1,151 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/detection/yolov5/ixrt/deploy.py b/models/cv/detection/yolov5/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..8c2cc424f699e01bc88dab98a29dc4c83e4d9b9e --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/deploy.py @@ -0,0 +1,150 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + else: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/inference.py b/models/cv/detection/yolov5/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..a7a60c878df96d294cdd56efe87a973a0e1f8765 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/inference.py @@ -0,0 +1,264 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0 and config.loop_count < num_samples/bsz : + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + # Set input + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + + # Forward + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + cuda.memcpy_htod(nms_inputs[0]["allocation"], nms_input) + nms_context.execute_v2(nms_allocations) + cuda.memcpy_dtoh(nms_output0, nms_outputs[0]["allocation"]) + cuda.memcpy_dtoh(nms_output1, nms_outputs[1]["allocation"]) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + fps = num_samples / forward_time + + if config.test_mode == "FPS": + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(1) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/load_ixrt_plugin.py b/models/cv/detection/yolov5/ixrt/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..ae47dc8e854b6bea1f768e65c4dd481048bfebce --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/load_ixrt_plugin.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/modify_batchsize.py b/models/cv/detection/yolov5/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..3a88c1603bd6f457fd4965257627dc29edcda4d1 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/modify_batchsize.py @@ -0,0 +1,52 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/quant.py b/models/cv/detection/yolov5/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..36fd39a13c2e1e40f4dc0098f042e66e4bd0d26a --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/quant.py @@ -0,0 +1,67 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_accuracy.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..140ab8ace521610303cbfc0582e0c5eaf6188c62 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_accuracy.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_performance.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..01542134796eda4a0d46c33e3d28c23120e690ba --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_performance.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_accuracy.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..18d11eff42fc5ecf60ae3338ae1c4688ff252127 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_accuracy.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_performance.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..08525d287a6e850b7ee253bb13dc165c2253f045 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_performance.sh @@ -0,0 +1,211 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov5/ixrt/simplify_model.py b/models/cv/detection/yolov5/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1400fd81ddb4b3fae1b20d0fd35082a692f5d292 --- /dev/null +++ b/models/cv/detection/yolov5/ixrt/simplify_model.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/README.md b/models/cv/detection/yolov5s/ixrt/README.md new file mode 100755 index 0000000000000000000000000000000000000000..eb218415f01afadfcb43c05f12df0e6b09e929bb --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/README.md @@ -0,0 +1,74 @@ +# YOLOv5-s + +## Description + +The YOLOv5 architecture is designed for efficient and accurate object detection tasks in real-time scenarios. It employs a single convolutional neural network to simultaneously predict bounding boxes and class probabilities for multiple objects within an image. The YOLOV5s is a tiny model. + +## Setup + +### Install +``` +yum install mesa-libGL +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install ultralytics +pip3 install pycocotools +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion +```bash + +mkdir checkpoints +git clone https://github.com/ultralytics/yolov5 +# 切换到需要的版本分支 +git checkout v6.1 + +# 有一些环境需要安装 +wget https://ultralytics.com/assets/Arial.ttf +cp Arial.ttf /root/.config/Ultralytics/Arial.ttf + +# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数) +python3 export.py --weights yolov5s.pt --include onnx --opset 11 --batch-size 32 +mv yolov5s.onnx /Path/to/checkpoints +``` + +## Inference +```bash +export PROJ_DIR=/Path/to/yolov5s/ixrt +export DATASETS_DIR=/Path/to/coco2017/ +export CHECKPOINTS_DIR=./checkpoints +export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +export EVAL_DIR=${DATASETS_DIR}/val2017 +export RUN_DIR=${PROJ_DIR}/ +export CONFIG_DIR=config/YOLOV5S_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov5s_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov5s_fp16_performance.sh +``` + +### INT8 +```bash +# Accuracy +bash scripts/infer_yolov5s_int8_accuracy.sh +# Performance +bash scripts/infer_yolov5s_int8_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 | +--------|-----------|----------|---------|----------|-------------| +YOLOv5s | 32 | FP16 | 1112.66 | 0.565 | 0.370 | +YOLOv5s | 32 | INT8 | 2440.54 | 0.557 | 0.351 | diff --git a/models/cv/detection/yolov5s/ixrt/build_engine.py b/models/cv/detection/yolov5s/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..a919bdd0183197ce125aa5492ec83e58e035675d --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/build_engine.py @@ -0,0 +1,58 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/build_nms_engine.py b/models/cv/detection/yolov5s/ixrt/build_nms_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..3be0d83d0d966018f59b87d22f628b9b1ddf9b21 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/build_nms_engine.py @@ -0,0 +1,94 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +def create_onnx(args): + nms = helper.make_node( + "NMS", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/calibration_dataset.py b/models/cv/detection/yolov5s/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..de37775a0c617fdefca4342423a6a47bdc9b9c41 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/calibration_dataset.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/coco_labels.py b/models/cv/detection/yolov5s/ixrt/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..43f5bd82cd257efdcab2bdba6bad64d9bb90416e --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/coco_labels.py @@ -0,0 +1,104 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/detection/yolov5s/ixrt/common.py b/models/cv/detection/yolov5s/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..695e05ba9605a8b251d74567830ecbeb86387b4c --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/common.py @@ -0,0 +1,97 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import numpy as np +from tqdm import tqdm + +import tensorrt +import pycuda.driver as cuda + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/config/YOLOV5S_CONFIG b/models/cv/detection/yolov5s/ixrt/config/YOLOV5S_CONFIG new file mode 100755 index 0000000000000000000000000000000000000000..8aa23b8e276963cf63247b252962ddf521b66dfd --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/config/YOLOV5S_CONFIG @@ -0,0 +1,64 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolov5s +ORIGINE_MODEL=yolov5s.onnx +DATA_PROCESS_TYPE=yolov5 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(/model.24/m.0/Conv_output_0 /model.24/m.1/Conv_output_0 /model.24/m.2/Conv_output_0) +DECODER_8_ANCHOR=(10 13 16 30 33 23) +DECODER_16_ANCHOR=(30 61 62 45 59 119) +DECODER_32_ANCHOR=(116 90 156 198 373 326) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=25200 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/cut_model.py b/models/cv/detection/yolov5s/ixrt/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e9ee19aadf0809fe1b97e3225d09150fb54513f7 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/cut_model.py @@ -0,0 +1,31 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/datasets/__init__.py b/models/cv/detection/yolov5s/ixrt/datasets/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..162e24b462289dcee7b7a2888b93fad1115def81 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/datasets/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/datasets/coco.py b/models/cv/detection/yolov5s/ixrt/datasets/coco.py new file mode 100755 index 0000000000000000000000000000000000000000..73c5df54761b917ecd0127fb56b61d9bd34c1196 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/datasets/coco.py @@ -0,0 +1,131 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/detection/yolov5s/ixrt/datasets/common.py b/models/cv/detection/yolov5s/ixrt/datasets/common.py new file mode 100755 index 0000000000000000000000000000000000000000..ef36eba394917bc05af46f33be48463df50f540d --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/datasets/common.py @@ -0,0 +1,81 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/datasets/post_process.py b/models/cv/detection/yolov5s/ixrt/datasets/post_process.py new file mode 100755 index 0000000000000000000000000000000000000000..8590816a0df18b6ef296ebe305b15b81240ab1d0 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/datasets/post_process.py @@ -0,0 +1,130 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/datasets/pre_process.py b/models/cv/detection/yolov5s/ixrt/datasets/pre_process.py new file mode 100755 index 0000000000000000000000000000000000000000..c651f8adb7c8190c214fbbbb7769c7d0713e9619 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/datasets/pre_process.py @@ -0,0 +1,71 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/datasets/vision.py b/models/cv/detection/yolov5s/ixrt/datasets/vision.py new file mode 100755 index 0000000000000000000000000000000000000000..eadefb2c5b35abd0a11fa85c65891461a210aef8 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/datasets/vision.py @@ -0,0 +1,151 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/detection/yolov5s/ixrt/deploy.py b/models/cv/detection/yolov5s/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..37c5f9ac9d1893978f09f1717d99b5857274121e --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/deploy.py @@ -0,0 +1,150 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + else: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/inference.py b/models/cv/detection/yolov5s/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..addf5278f5e31aca71ffa75051a1f0a716840d92 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/inference.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0 and config.loop_count < num_samples/bsz : + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + # Set input + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + + # Forward + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + cuda.memcpy_htod(nms_inputs[0]["allocation"], nms_input) + nms_context.execute_v2(nms_allocations) + cuda.memcpy_dtoh(nms_output0, nms_outputs[0]["allocation"]) + cuda.memcpy_dtoh(nms_output1, nms_outputs[1]["allocation"]) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + fps = num_samples / forward_time + + if config.test_mode == "FPS": + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(1) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/load_ixrt_plugin.py b/models/cv/detection/yolov5s/ixrt/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..ae47dc8e854b6bea1f768e65c4dd481048bfebce --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/load_ixrt_plugin.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/modify_batchsize.py b/models/cv/detection/yolov5s/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..3a88c1603bd6f457fd4965257627dc29edcda4d1 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/modify_batchsize.py @@ -0,0 +1,52 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/quant.py b/models/cv/detection/yolov5s/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..36fd39a13c2e1e40f4dc0098f042e66e4bd0d26a --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/quant.py @@ -0,0 +1,67 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_fp16_accuracy.sh b/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..81b32fd1ec2538faad36dc432817cd8036032a95 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_fp16_accuracy.sh @@ -0,0 +1,212 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo COCO_GT : ${COCO_GT} +echo EVAL_DIR : ${EVAL_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=${CHECKPOINTS_DIR}/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir ${CHECKPOINTS_DIR} \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_fp16_performance.sh b/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..4ab4f9e413d156ac4b2b669240f55f09cf5eb14a --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_fp16_performance.sh @@ -0,0 +1,212 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo COCO_GT : ${COCO_GT} +echo EVAL_DIR : ${EVAL_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir ${CHECKPOINTS_DIR} \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_int8_accuracy.sh b/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..fc7988dfd37e36039a8d1d835dbc558d9da2ffe5 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_int8_accuracy.sh @@ -0,0 +1,212 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo COCO_GT : ${COCO_GT} +echo EVAL_DIR : ${EVAL_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=${CHECKPOINTS_DIR}/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir ${CHECKPOINTS_DIR} \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_int8_performance.sh b/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..dc912fa9873e9ced8683a41c6f9600a8e5a80b62 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/scripts/infer_yolov5s_int8_performance.sh @@ -0,0 +1,213 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo COCO_GT : ${COCO_GT} +echo EVAL_DIR : ${EVAL_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir ${CHECKPOINTS_DIR} \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov5s/ixrt/simplify_model.py b/models/cv/detection/yolov5s/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1400fd81ddb4b3fae1b20d0fd35082a692f5d292 --- /dev/null +++ b/models/cv/detection/yolov5s/ixrt/simplify_model.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/README.md b/models/cv/detection/yolov7/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c91971e794049acfa11bd2e0887a710bf034c347 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/README.md @@ -0,0 +1,71 @@ +# YOLOv7 + +## Description + +YOLOv7 is an object detection model based on the YOLO (You Only Look Once) series. It is an improved version of YOLOv5 developed by the Ultralytics team. YOLOv7 aims to enhance the performance and efficiency of object detection through a series of improvements including network architecture, training strategies, and data augmentation techniques, in order to achieve more accurate and faster object detection. + +## Setup + +### Install +``` +yum install mesa-libGL +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install ultralytics +pip3 install pycocotools +pip3 install cv2 +pip3 install opencv-python==4.6.0.66 +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + - 图片目录: Path/To/val2017/*.jpg + - 标注文件目录: Path/To/annotations/instances_val2017.json + +### Model Conversion +```bash + +git clone https://github.com/WongKinYiu/yolov7.git +cd yolov7 +python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 32 +mkdir /Your_Projects/To/checkpoints +mv yolov7.onnx /Path/to/checkpoints/yolov7m.onnx +``` + +## Inference +```bash +export PROJ_DIR=/Path/to/yolov7/ixrt +export DATASETS_DIR=/Path/to/coco2017/ +export CHECKPOINTS_DIR=./checkpoints +export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +export EVAL_DIR=${DATASETS_DIR}/val2017 +export RUN_DIR=/Path/to/yolov7/ixrt +export CONFIG_DIR=config/YOLOV7M_CONFIG +``` +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov7m_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov7m_fp16_performance.sh +``` + +### INT8 +```bash +# Accuracy +bash scripts/infer_yolov7m_int8_accuracy.sh +# Performance +bash scripts/infer_yolov7m_int8_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 | +--------|-----------|----------|---------|----------|-------------| +YOLOv7 | 32 | FP16 | 375.41 | 0.693 | 0.506 | +YOLOv7 | 32 | INT8 | 816.71 | 0.688 | 0.471 | \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/build_engine.py b/models/cv/detection/yolov7/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..a919bdd0183197ce125aa5492ec83e58e035675d --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/build_engine.py @@ -0,0 +1,58 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/build_nms_engine.py b/models/cv/detection/yolov7/ixrt/build_nms_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..3be0d83d0d966018f59b87d22f628b9b1ddf9b21 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/build_nms_engine.py @@ -0,0 +1,94 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +def create_onnx(args): + nms = helper.make_node( + "NMS", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/calibration_dataset.py b/models/cv/detection/yolov7/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..de37775a0c617fdefca4342423a6a47bdc9b9c41 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/calibration_dataset.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/coco_labels.py b/models/cv/detection/yolov7/ixrt/coco_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..43f5bd82cd257efdcab2bdba6bad64d9bb90416e --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/coco_labels.py @@ -0,0 +1,104 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/detection/yolov7/ixrt/common.py b/models/cv/detection/yolov7/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..aba2117c9942d6823abf73bf3ab94c291a7705e2 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/common.py @@ -0,0 +1,98 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import numpy as np +from tqdm import tqdm + +import tensorrt +import pycuda.driver as cuda + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/config/YOLOV7M_CONFIG b/models/cv/detection/yolov7/ixrt/config/YOLOV7M_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..e6cb457d7bd09f941e5fba164265838af0ab5cb0 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/config/YOLOV7M_CONFIG @@ -0,0 +1,65 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolov7m +ORIGINE_MODEL=yolov7m.onnx +DATA_PROCESS_TYPE=yolov5 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(/model/model.105/m.0/Conv_output_0 /model/model.105/m.1/Conv_output_0 /model/model.105/m.2/Conv_output_0) + +DECODER_8_ANCHOR=(12 16 19 36 40 28) +DECODER_16_ANCHOR=(36 75 76 55 72 146) +DECODER_32_ANCHOR=(142 110 192 243 459 401) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=25200 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/cut_model.py b/models/cv/detection/yolov7/ixrt/cut_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e9ee19aadf0809fe1b97e3225d09150fb54513f7 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/cut_model.py @@ -0,0 +1,31 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/datasets/__init__.py b/models/cv/detection/yolov7/ixrt/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..162e24b462289dcee7b7a2888b93fad1115def81 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/datasets/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/datasets/coco.py b/models/cv/detection/yolov7/ixrt/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..73c5df54761b917ecd0127fb56b61d9bd34c1196 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/datasets/coco.py @@ -0,0 +1,131 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/detection/yolov7/ixrt/datasets/common.py b/models/cv/detection/yolov7/ixrt/datasets/common.py new file mode 100644 index 0000000000000000000000000000000000000000..ef36eba394917bc05af46f33be48463df50f540d --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/datasets/common.py @@ -0,0 +1,81 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/datasets/post_process.py b/models/cv/detection/yolov7/ixrt/datasets/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..8590816a0df18b6ef296ebe305b15b81240ab1d0 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/datasets/post_process.py @@ -0,0 +1,130 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/datasets/pre_process.py b/models/cv/detection/yolov7/ixrt/datasets/pre_process.py new file mode 100644 index 0000000000000000000000000000000000000000..c651f8adb7c8190c214fbbbb7769c7d0713e9619 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/datasets/pre_process.py @@ -0,0 +1,71 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/datasets/vision.py b/models/cv/detection/yolov7/ixrt/datasets/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..eadefb2c5b35abd0a11fa85c65891461a210aef8 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/datasets/vision.py @@ -0,0 +1,151 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/detection/yolov7/ixrt/deploy.py b/models/cv/detection/yolov7/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..8c2cc424f699e01bc88dab98a29dc4c83e4d9b9e --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/deploy.py @@ -0,0 +1,150 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + else: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/inference.py b/models/cv/detection/yolov7/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..a7a60c878df96d294cdd56efe87a973a0e1f8765 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/inference.py @@ -0,0 +1,264 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import pycuda.autoinit +import pycuda.driver as cuda + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0 and config.loop_count < num_samples/bsz : + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + # Set input + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + + # Forward + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + cuda.memcpy_htod(nms_inputs[0]["allocation"], nms_input) + nms_context.execute_v2(nms_allocations) + cuda.memcpy_dtoh(nms_output0, nms_outputs[0]["allocation"]) + cuda.memcpy_dtoh(nms_output1, nms_outputs[1]["allocation"]) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + fps = num_samples / forward_time + + if config.test_mode == "FPS": + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(1) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/load_ixrt_plugin.py b/models/cv/detection/yolov7/ixrt/load_ixrt_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..ae47dc8e854b6bea1f768e65c4dd481048bfebce --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/load_ixrt_plugin.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/modify_batchsize.py b/models/cv/detection/yolov7/ixrt/modify_batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..3a88c1603bd6f457fd4965257627dc29edcda4d1 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/modify_batchsize.py @@ -0,0 +1,52 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/quant.py b/models/cv/detection/yolov7/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..36fd39a13c2e1e40f4dc0098f042e66e4bd0d26a --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/quant.py @@ -0,0 +1,67 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_accuracy.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..140ab8ace521610303cbfc0582e0c5eaf6188c62 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_accuracy.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_performance.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..01542134796eda4a0d46c33e3d28c23120e690ba --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_performance.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_accuracy.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..18d11eff42fc5ecf60ae3338ae1c4688ff252127 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_accuracy.sh @@ -0,0 +1,210 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_performance.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..08525d287a6e850b7ee253bb13dc165c2253f045 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_performance.sh @@ -0,0 +1,211 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=-1 +LOOP_COUNT=10 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov7/ixrt/simplify_model.py b/models/cv/detection/yolov7/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1400fd81ddb4b3fae1b20d0fd35082a692f5d292 --- /dev/null +++ b/models/cv/detection/yolov7/ixrt/simplify_model.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file