diff --git a/models/cv/detection/yolov6/ixrt/README.md b/models/cv/detection/yolov6/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..166c2fa06ccb7fdcb0502f4af245de386d04774b --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/README.md @@ -0,0 +1,84 @@ +# YOLOv6 + +## Description + +YOLOv6 integrates cutting-edge object detection advancements from industry and academia, incorporating recent innovations in network design, training strategies, testing techniques, quantization, and optimization methods. This culmination results in a suite of deployment-ready networks, accommodating varied use cases across different scales. + +## Setup + +### Install + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install pycocotools +pip3 install pycuda +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +```bash +# get yolov6s.pt +wget https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s.pt +# set coco path +mkdir -p data/ +ln -s /Path/to/coco/ data/coco +``` + +### Model Conversion + +```bash +# install yolov6 +git clone https://github.com/meituan/YOLOv6.git + +pushd YOLOv6 +pip3 install -r requirements.txt + +# export onnx model +python3 deploy/ONNX/export_onnx.py --weights ../yolov6s.pt --img 640 --batch-size 32 --simplify +mv ../yolov6s.onnx ../data/ + +popd +``` + +## Inference + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov6s_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov6s_fp16_performance.sh +``` + +### INT8 + +```bash +# Accuracy +bash scripts/infer_yolov6s_int8_accuracy.sh +# Performance +bash scripts/infer_yolov6s_int8_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | MAP@0.5 | +| ------ | --------- | --------- | -------- | ------- | +| YOLOv6 | 32 | FP16 | 1107.511 | 0.355 | +| YOLOv6 | 32 | INT8 | 2080.475 | - | + +## Reference + +YOLOv6: diff --git a/models/cv/detection/yolov6/ixrt/build_engine.py b/models/cv/detection/yolov6/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..f5e1719a22c84b400a2ba9b9cbfdea6bae99e80d --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/build_engine.py @@ -0,0 +1,94 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from tensorrt import Dims + + +def build_engine_trtapi_staticshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build static shape engine done!") + + +def build_engine_trtapi_dynamicshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + + profile = builder.create_optimization_profile() + profile.set_shape("input", + Dims([1, 3, 608, 608]), + Dims([32, 3, 608, 608]), + Dims([64, 3, 608, 608]), + ) + build_config.add_optimization_profile(profile) + + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + # set dynamic + num_inputs = network.num_inputs + for i in range(num_inputs): + input_tensor = network.get_input(i) + input_tensor.shape = Dims([-1, 3, 608, 608]) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build dynamic shape engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + + +if __name__ == "__main__": + args = parse_args() + build_engine_trtapi_staticshape(args) + # build_engine_trtapi_dynamicshape(args) diff --git a/models/cv/detection/yolov6/ixrt/common.py b/models/cv/detection/yolov6/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..dc3c2766533fa5a334a61231adb168ecf09622c3 --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/common.py @@ -0,0 +1,335 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import time +import numpy as np +from tqdm import tqdm + +import tensorrt +import pycuda.driver as cuda + + +def load_class_names(namesfile): + class_names = [] + with open(namesfile, 'r') as fp: + lines = fp.readlines() + for line in lines: + line = line.rstrip() + class_names.append(line) + return class_names + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + + for x1, y1, x2, y2, _, p, c in boxes: + x1, y1, x2, y2, p = float(x1), float(y1), float(x2), float(y2), float(p) + c = int(c) + x = x1 + y = y1 + w = x2 - x1 + h = y2 - y1 + + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +################## About TensorRT ################# +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def setup_io_bindings(engine, context): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = context.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + # print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations +########################################################## + + +################## About Loading Dataset ################# +def load_images(images_path): + """ + If image path is given, return it directly + For txt file, read it and return each line as image path + In other case, it's a folder, return a list with names of each + jpg, jpeg and png file + """ + input_path_extension = images_path.split('.')[-1] + if input_path_extension in ['jpg', 'jpeg', 'png']: + return [images_path] + elif input_path_extension == "txt": + with open(images_path, "r") as f: + return f.read().splitlines() + else: + return glob.glob( + os.path.join(images_path, "*.jpg")) + \ + glob.glob(os.path.join(images_path, "*.png")) + \ + glob.glob(os.path.join(images_path, "*.jpeg")) + +def prepare_batch(images_path, bs=16, input_size=(608, 608)): + + width, height = input_size + + batch_names = [] + batch_images = [] + batch_shapes = [] + + temp_names = [] + temp_images = [] + temp_shapes = [] + + for i, image_path in tqdm(enumerate(images_path), desc="Loading coco data"): + name = os.path.basename(image_path) + image = cv2.imread(image_path) + h, w, _ = image.shape + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image_resized = cv2.resize(image_rgb, (width, height), + interpolation=cv2.INTER_LINEAR) + custom_image = image_resized.transpose(2, 0, 1).astype(np.float32) / 255. + custom_image = np.expand_dims(custom_image, axis=0) + + if i != 0 and i % bs == 0: + batch_names.append(temp_names) + batch_images.append(np.concatenate(temp_images, axis=0)) + batch_shapes.append(temp_shapes) + + temp_names = [name] + temp_images = [custom_image] + temp_shapes = [(h, w)] + else: + temp_names.append(name) + temp_images.append(custom_image) + temp_shapes.append((h, w)) + + return batch_names, batch_images, batch_shapes +########################################################## + + +################## About Operating box ################# +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 +########################################################## + + +################## About pre and post processing ######### +def pre_processing(src_img, imgsz=608): + resized = cv2.resize(src_img, (imgsz, imgsz), interpolation=cv2.INTER_LINEAR) + in_img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) + in_img = np.transpose(in_img, (2, 0, 1)).astype(np.float32) + in_img = np.expand_dims(in_img, axis=0) + in_img /= 255.0 + return in_img + +def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False): + # print(boxes.shape) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1) * (y2 - y1) + order = confs.argsort()[::-1] + + keep = [] + while order.size > 0: + idx_self = order[0] + idx_other = order[1:] + + keep.append(idx_self) + + xx1 = np.maximum(x1[idx_self], x1[idx_other]) + yy1 = np.maximum(y1[idx_self], y1[idx_other]) + xx2 = np.minimum(x2[idx_self], x2[idx_other]) + yy2 = np.minimum(y2[idx_self], y2[idx_other]) + + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + inter = w * h + + if min_mode: + over = inter / np.minimum(areas[order[0]], areas[order[1:]]) + else: + over = inter / (areas[order[0]] + areas[order[1:]] - inter) + + inds = np.where(over <= nms_thresh)[0] + order = order[inds + 1] + + return np.array(keep) + + +def post_processing(img, conf_thresh, nms_thresh, output, num_classes=80): + + # [batch, num, 1, 4] + box_array = output[:, :, :4] + # [batch, num, 2] + class_confs = output[:, :, 4:] + + max_conf = class_confs[:, :, 1] + max_id = class_confs[:, :, 0] + + bboxes_batch = [] + for i in range(box_array.shape[0]): + + argwhere = max_conf[i] > conf_thresh + l_box_array = box_array[i, argwhere, :] + l_max_conf = max_conf[i, argwhere] + l_max_id = max_id[i, argwhere] + + bboxes = [] + # nms for each class + for j in range(num_classes): + + cls_argwhere = l_max_id == j + ll_box_array = l_box_array[cls_argwhere, :] + ll_max_conf = l_max_conf[cls_argwhere] + ll_max_id = l_max_id[cls_argwhere] + + keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh) + + if (keep.size > 0): + ll_box_array = ll_box_array[keep, :] + ll_max_conf = ll_max_conf[keep] + ll_max_id = ll_max_id[keep] + + for k in range(ll_box_array.shape[0]): + bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], + ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]]) + + bboxes_batch.append(bboxes) + + return bboxes_batch +########################################################## + diff --git a/models/cv/detection/yolov6/ixrt/deploy.py b/models/cv/detection/yolov6/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..f73d14b2617eee1e458825dc66d38177f482a1b1 --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/deploy.py @@ -0,0 +1,99 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import copy + +from typing import Union, Callable, List + +from tensorrt.deploy.api import * +from tensorrt.deploy.backend.onnx.converter import default_converter +from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type +from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr +from tensorrt.deploy.ir.operator_type import OperatorType as OP +from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name +from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence +from tensorrt.deploy.ir import Graph +from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator +from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class FuseSiLUPass(BasePass): + def process(self, graph: Graph) -> Graph: + pattern = build_sequence_graph([OP.SIGMOID, OP.MUL]) + + matcher = GraphMatcher(pattern, strict=False) + self.transform = GraphTransform(graph) + matcher.findall(graph, self.fuse_mish) + return graph + + def fuse_mish(self, graph: Graph, pattern_graph: PatternGraph): + sigmoid = pattern_graph.nodes[0].operator + mul = pattern_graph.nodes[-1].operator + + if not self.can_fused(graph, pattern_graph): + return + + self.transform.delete_operators_between_op_op(sigmoid, mul) + + silu_op = Operator( + name=generate_operator_name(graph, pattern="SiLU_{idx}"), + op_type=OP.SILU, + inputs=copy.copy(sigmoid.inputs), + outputs=copy.copy(mul.outputs), + ) + silu_op.is_quant_operator = sigmoid.is_quant_operator and mul.is_quant_operator + graph.add_operator(silu_op) + + def can_fused(self, graph: Graph, pattern_graph: PatternGraph): + sigmoid = pattern_graph.nodes[0].operator + mul = pattern_graph.nodes[-1].operator + + # 如果 sigmoid 的结果 被多个 OP 使用,则不能融合 + if len(self.transform.get_next_operators(sigmoid)) > 1: + return False + + # 检查 mul 的输入是不是和 sigmoid 是同源的 + softplus_prev_op = graph.get_previous_operators(sigmoid) + if len(softplus_prev_op) != 1: + return False + + mul_prev_op = graph.get_previous_operators(mul) + if len(mul_prev_op) != 2: + return False + + for op in mul_prev_op: + if op is softplus_prev_op[0]: + return True + + return False + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + args = parser.parse_args() + return args + + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = FuseSiLUPass().process(graph) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) diff --git a/models/cv/detection/yolov6/ixrt/inference.py b/models/cv/detection/yolov6/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..836f13b2376ded6144ea9bf0da7ed47cd3f5905f --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/inference.py @@ -0,0 +1,253 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import sys +sys.path.insert(0, "YOLOv6") +import json +import argparse +import time +import tensorrt +from tensorrt import Dims +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import numpy as np +from tqdm import tqdm + +from common import create_engine_context, setup_io_bindings + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +from yolov6.core.evaler import Evaler +from yolov6.utils.events import NCOLS +from yolov6.utils.nms import non_max_suppression +from yolov6.data.data_load import create_dataloader + + +coco_classes = { + 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', + 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', + 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', + 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', + 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', + 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', + 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush' +} + +class EvalerIXRT(Evaler): + def eval_ixrt(self, args, stride=32): + self.stride = stride + def init_data(dataloader, task): + self.is_coco = self.data.get("is_coco", False) + self.ids = self.coco80_to_coco91_class() if self.is_coco else list(range(1000)) + pad = 0.0 + dataloader = create_dataloader( + self.data[task], self.img_size, self.batch_size, self.stride, + check_labels=True, pad=pad, rect=False, data_dict=self.data, task=task)[0] + return dataloader + + dataloader = init_data(None,'val') + pred_results = [] + + input_name = "input" + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + engine, context = create_engine_context(args.model_engine, logger) + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims((args.bsz,3,args.imgsz,args.imgsz))) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + if args.warm_up > 0: + print("\nWarm Start.") + for i in range(args.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + pbar = tqdm(dataloader, desc="Inferencing model in validation dataset.", ncols=NCOLS) + + forward_time = 0.0 + num_samples = 0 + for imgs, targes, paths, shapes in pbar: + imgs = imgs.float() + pad_batch = len(imgs) != self.batch_size + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.batch_size, *imgs.shape[1:])) + imgs /= 255.0 + # print(imgs.shape) + batch_data = np.ascontiguousarray(imgs) + data_shape = batch_data.shape + + cur_bsz_sample = batch_data.shape[0] + num_samples += cur_bsz_sample + + # Set input + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims(data_shape)) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + + + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + if not args.perf_only: + if pad_batch: + output = output[:origin_size] + + outputs = torch.from_numpy(output) + outputs = non_max_suppression(outputs, self.conf_thres, self.iou_thres, multi_label=True) + pred_results.extend(self.convert_to_coco_format(outputs, imgs, paths, shapes, self.ids)) + if args.perf_only: + fps = num_samples / forward_time + return fps + else: + return dataloader, pred_results + + def eval_ixrt_map(self, pred_results, dataloader, task): + '''Evaluate models + For task speed, this function only evaluates the speed of model and outputs inference time. + For task val, this function evaluates the speed and mAP by pycocotools, and returns + inference time and mAP value. + ''' + if not self.do_coco_metric and self.do_pr_metric: + return self.pr_metric_result + print(f'\nEvaluating mAP by pycocotools.') + if task != 'speed' and len(pred_results): + if 'anno_path' in self.data: + anno_json = self.data['anno_path'] + else: + # generated coco format labels in dataset initialization + task = 'val' if task == 'train' else task + dataset_root = os.path.dirname(os.path.dirname(self.data[task])) + base_name = os.path.basename(self.data[task]) + anno_json = os.path.join(dataset_root, 'annotations', f'instances_{base_name}.json') + pred_json = os.path.join(self.save_dir, "predictions.json") + print(f'Saving {pred_json}...') + with open(pred_json, 'w') as f: + json.dump(pred_results, f) + + anno = COCO(anno_json) + pred = anno.loadRes(pred_json) + cocoEval = COCOeval(anno, pred, 'bbox') + if self.is_coco: + imgIds = [int(os.path.basename(x).split(".")[0]) + for x in dataloader.dataset.img_paths] + cocoEval.params.imgIds = imgIds + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + return cocoEval.stats + else: + print("pred_results is none") + return None + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=608, + help="inference size h,w", + ) + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + task = 'val' + + batch_size = args.bsz + data_path = os.path.join(args.datasets, "images", "val2017") + label_path = os.path.join(args.datasets, "annotations", "instances_val2017.json") + + + data = { + 'task': 'val', + 'val': data_path, + 'anno_path': label_path, + 'names': coco_classes, + 'is_coco': True, + 'nc': 80 + } + + evaluator = EvalerIXRT(data, batch_size) + + if args.perf_only: + fps = evaluator.eval_ixrt(args) + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {args.fps_target}") + else: + dataloader, pred_results = evaluator.eval_ixrt(args) + eval_result = evaluator.eval_ixrt_map(pred_results, dataloader, task) + map, map50 = eval_result[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {args.acc_target}") + if map50 >= args.acc_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/yolov6/ixrt/quant.py b/models/cv/detection/yolov6/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..70265cbc25d24d4ed41640c76f78a1839555f749 --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/quant.py @@ -0,0 +1,105 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from common import letterbox + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov4_bs16_without_decoder.onnx") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_quant_model", type=str, help="save the quantization model path", default=None) + parser.add_argument("--bsz", type=int, default=16) + parser.add_argument("--step", type=int, default=32) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=608) + parser.add_argument("--use_letterbox", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + + +def get_dataloader(data_dir, step=32, batch_size=16, new_shape=[608, 608], use_letterbox=False): + num = step * batch_size + val_list = [os.path.join(data_dir, x) for x in os.listdir(data_dir)] + random.shuffle(val_list) + pic_list = val_list[:num] + + calibration_dataset = [] + for file_path in pic_list: + pic_data = cv2.imread(file_path) + org_img = pic_data + assert org_img is not None, 'Image not Found ' + file_path + h0, w0 = org_img.shape[:2] + + if use_letterbox: + img, ratio, dwdh = letterbox(org_img, new_shape=(new_shape[1], new_shape[0]), auto=False, scaleup=True) + else: + img = cv2.resize(org_img, new_shape) + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) / 255.0 # 0~1 np array + img = torch.from_numpy(img).float() + + calibration_dataset.append(img) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=True, + batch_size=batch_size, + drop_last=True + ) + return calibration_dataloader + +dataloader = get_dataloader( + data_dir=args.dataset_dir, + step=args.step, + batch_size=args.bsz, + new_shape=(args.imgsz, args.imgsz), + use_letterbox=args.use_letterbox +) + +dirname = os.path.dirname(args.save_quant_model) +quant_json_path = os.path.join(dirname, f"quantized_{model_name}.json") + +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=args.save_quant_model, + save_quant_params_path=quant_json_path, + observer=args.observer, + data_preprocess=lambda x: x.to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_accuracy.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..09cc0ac03802a697696ff3e68ea2c2157e240ea7 --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_accuracy.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=$(cd $(dirname $0);cd ../; pwd) +DATASETS_DIR="${PROJ_DIR}/data/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/data" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov6s +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov6s.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov6s_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --acc_target 0.3 +exit ${EXIT_STATUS} diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_performance.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..409fd354e86d7fa3092fda68bd1da2c1ed35498d --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_performance.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=$(cd $(dirname $0);cd ../; pwd) +DATASETS_DIR="${PROJ_DIR}/data/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/data" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov6s +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov6s.onnx + +# fuse silu +# FINAL_MODEL=${CHECKPOINTS_DIR}/yolov6_bs${BATCH_SIZE}_fused.onnx +# if [ -f $FINAL_MODEL ];then +# echo " "Fuse silu Skip, $FINAL_MODEL has been existed +# else +# python3 ${RUN_DIR}/deploy.py \ +# --src ${CURRENT_MODEL} \ +# --dst ${FINAL_MODEL} +# echo " "Generate ${FINAL_MODEL} +# fi +# CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov6s_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --perf_only true \ + --fps_target 0.0 +exit ${EXIT_STATUS} diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_accuracy.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..701f80f06ac1ca46d154c1122f02913b247a83af --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_accuracy.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=$(cd $(dirname $0);cd ../; pwd) +DATASETS_DIR="${PROJ_DIR}/data/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/data" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov6s +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov6s.onnx + +# quant +FINAL_MODEL=${CHECKPOINTS_DIR}/quantized_yolov6s_bs${BATCH_SIZE}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/quant.py \ + --model_name "YOLOV6s" \ + --model ${CURRENT_MODEL} \ + --bsz ${BATCH_SIZE} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --observer "hist_percentile" \ + --save_quant_model ${FINAL_MODEL} \ + --imgsz 640 \ + --disable_quant_names '/detect/Split' '/detect/Div' '/detect/Sub' '/detect/Add' '/detect/Add_1' '/detect/Sub_1' '/detect/Div' '/detect/Concat_6' '/detect/Mul' '/detect/Concat_7' \ + --use_letterbox + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov6s_int8.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision int8 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --acc_target 0.3 +exit ${EXIT_STATUS} diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_performance.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..58f77417058c5461fe84161bb139bcecad4623c6 --- /dev/null +++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_performance.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=$(cd $(dirname $0);cd ../; pwd) +DATASETS_DIR="${PROJ_DIR}/data/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/data" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov6s +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov6s.onnx + +# quant +FINAL_MODEL=${CHECKPOINTS_DIR}/quantized_yolov6s_bs${BATCH_SIZE}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/quant.py \ + --model_name "YOLOV6s" \ + --model ${CURRENT_MODEL} \ + --bsz ${BATCH_SIZE} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --observer "hist_percentile" \ + --save_quant_model ${FINAL_MODEL} \ + --imgsz 640 \ + --disable_quant_names '/detect/Split' '/detect/Div' '/detect/Sub' '/detect/Add' '/detect/Add_1' '/detect/Sub_1' '/detect/Div' '/detect/Concat_6' '/detect/Mul' '/detect/Concat_7' \ + --use_letterbox + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov6s_int8.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision int8 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --perf_only true \ + --fps_target 0.0 +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/yolov8/ixrt/README.md b/models/cv/detection/yolov8/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07558edf6f3591a70262c778309d67484d1edf4f --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/README.md @@ -0,0 +1,72 @@ +# YOLOv8 + +## Description + +Yolov8 combines speed and accuracy in real-time object detection tasks. With a focus on simplicity and efficiency, this model employs a single neural network to make predictions, enabling fast and accurate identification of objects in images or video streams. + +## Setup + +### Install + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install pycocotools +pip3 install ultralytics +pip3 install pycuda +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +```bash +# get yolov8n.pt +wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt +# set coco path +mkdir -p data/ +ln -s /Path/to/coco/ data/coco +``` + +### Model Conversion + +```bash +python3 export.py --weight yolov8n.pt --batch 32 +onnxsim yolov8n.onnx ./data/yolov8n.onnx +``` + +## Inference + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov8n_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov8n_fp16_performance.sh +``` + +### INT8 + +```bash +# Accuracy +bash scripts/infer_yolov8n_int8_accuracy.sh +# Performance +bash scripts/infer_yolov8n_int8_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | MAP@0.5 | +| ------ | --------- | --------- | -------- | ------- | +| YOLOv8 | 32 | FP16 | 1511.366 | 0.525 | +| YOLOv8 | 32 | INT8 | 1841.017 | 0.517 | diff --git a/models/cv/detection/yolov8/ixrt/build_engine.py b/models/cv/detection/yolov8/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..f5e1719a22c84b400a2ba9b9cbfdea6bae99e80d --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/build_engine.py @@ -0,0 +1,94 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from tensorrt import Dims + + +def build_engine_trtapi_staticshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build static shape engine done!") + + +def build_engine_trtapi_dynamicshape(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + + profile = builder.create_optimization_profile() + profile.set_shape("input", + Dims([1, 3, 608, 608]), + Dims([32, 3, 608, 608]), + Dims([64, 3, 608, 608]), + ) + build_config.add_optimization_profile(profile) + + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + # set dynamic + num_inputs = network.num_inputs + for i in range(num_inputs): + input_tensor = network.get_input(i) + input_tensor.shape = Dims([-1, 3, 608, 608]) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + print("Build dynamic shape engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + + +if __name__ == "__main__": + args = parse_args() + build_engine_trtapi_staticshape(args) + # build_engine_trtapi_dynamicshape(args) diff --git a/models/cv/detection/yolov8/ixrt/common.py b/models/cv/detection/yolov8/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..dc3c2766533fa5a334a61231adb168ecf09622c3 --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/common.py @@ -0,0 +1,335 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import time +import numpy as np +from tqdm import tqdm + +import tensorrt +import pycuda.driver as cuda + + +def load_class_names(namesfile): + class_names = [] + with open(namesfile, 'r') as fp: + lines = fp.readlines() + for line in lines: + line = line.rstrip() + class_names.append(line) + return class_names + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + + for x1, y1, x2, y2, _, p, c in boxes: + x1, y1, x2, y2, p = float(x1), float(y1), float(x2), float(y2), float(p) + c = int(c) + x = x1 + y = y1 + w = x2 - x1 + h = y2 - y1 + + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +################## About TensorRT ################# +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def setup_io_bindings(engine, context): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = context.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + # print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations +########################################################## + + +################## About Loading Dataset ################# +def load_images(images_path): + """ + If image path is given, return it directly + For txt file, read it and return each line as image path + In other case, it's a folder, return a list with names of each + jpg, jpeg and png file + """ + input_path_extension = images_path.split('.')[-1] + if input_path_extension in ['jpg', 'jpeg', 'png']: + return [images_path] + elif input_path_extension == "txt": + with open(images_path, "r") as f: + return f.read().splitlines() + else: + return glob.glob( + os.path.join(images_path, "*.jpg")) + \ + glob.glob(os.path.join(images_path, "*.png")) + \ + glob.glob(os.path.join(images_path, "*.jpeg")) + +def prepare_batch(images_path, bs=16, input_size=(608, 608)): + + width, height = input_size + + batch_names = [] + batch_images = [] + batch_shapes = [] + + temp_names = [] + temp_images = [] + temp_shapes = [] + + for i, image_path in tqdm(enumerate(images_path), desc="Loading coco data"): + name = os.path.basename(image_path) + image = cv2.imread(image_path) + h, w, _ = image.shape + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image_resized = cv2.resize(image_rgb, (width, height), + interpolation=cv2.INTER_LINEAR) + custom_image = image_resized.transpose(2, 0, 1).astype(np.float32) / 255. + custom_image = np.expand_dims(custom_image, axis=0) + + if i != 0 and i % bs == 0: + batch_names.append(temp_names) + batch_images.append(np.concatenate(temp_images, axis=0)) + batch_shapes.append(temp_shapes) + + temp_names = [name] + temp_images = [custom_image] + temp_shapes = [(h, w)] + else: + temp_names.append(name) + temp_images.append(custom_image) + temp_shapes.append((h, w)) + + return batch_names, batch_images, batch_shapes +########################################################## + + +################## About Operating box ################# +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 +########################################################## + + +################## About pre and post processing ######### +def pre_processing(src_img, imgsz=608): + resized = cv2.resize(src_img, (imgsz, imgsz), interpolation=cv2.INTER_LINEAR) + in_img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) + in_img = np.transpose(in_img, (2, 0, 1)).astype(np.float32) + in_img = np.expand_dims(in_img, axis=0) + in_img /= 255.0 + return in_img + +def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False): + # print(boxes.shape) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1) * (y2 - y1) + order = confs.argsort()[::-1] + + keep = [] + while order.size > 0: + idx_self = order[0] + idx_other = order[1:] + + keep.append(idx_self) + + xx1 = np.maximum(x1[idx_self], x1[idx_other]) + yy1 = np.maximum(y1[idx_self], y1[idx_other]) + xx2 = np.minimum(x2[idx_self], x2[idx_other]) + yy2 = np.minimum(y2[idx_self], y2[idx_other]) + + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + inter = w * h + + if min_mode: + over = inter / np.minimum(areas[order[0]], areas[order[1:]]) + else: + over = inter / (areas[order[0]] + areas[order[1:]] - inter) + + inds = np.where(over <= nms_thresh)[0] + order = order[inds + 1] + + return np.array(keep) + + +def post_processing(img, conf_thresh, nms_thresh, output, num_classes=80): + + # [batch, num, 1, 4] + box_array = output[:, :, :4] + # [batch, num, 2] + class_confs = output[:, :, 4:] + + max_conf = class_confs[:, :, 1] + max_id = class_confs[:, :, 0] + + bboxes_batch = [] + for i in range(box_array.shape[0]): + + argwhere = max_conf[i] > conf_thresh + l_box_array = box_array[i, argwhere, :] + l_max_conf = max_conf[i, argwhere] + l_max_id = max_id[i, argwhere] + + bboxes = [] + # nms for each class + for j in range(num_classes): + + cls_argwhere = l_max_id == j + ll_box_array = l_box_array[cls_argwhere, :] + ll_max_conf = l_max_conf[cls_argwhere] + ll_max_id = l_max_id[cls_argwhere] + + keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh) + + if (keep.size > 0): + ll_box_array = ll_box_array[keep, :] + ll_max_conf = ll_max_conf[keep] + ll_max_id = ll_max_id[keep] + + for k in range(ll_box_array.shape[0]): + bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], + ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]]) + + bboxes_batch.append(bboxes) + + return bboxes_batch +########################################################## + diff --git a/models/cv/detection/yolov8/ixrt/export.py b/models/cv/detection/yolov8/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..383b327e5794fd7930a78e2acfbf4237c556c4d8 --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/export.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ultralytics import YOLO + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--batch", + type=int, + required=True, + help="batchsize of the model.") + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = YOLO(args.weight).cpu() + + model.export(format='onnx', batch=args.batch, imgsz=(640, 640), opset=11) + +if __name__ == "__main__": + main() diff --git a/models/cv/detection/yolov8/ixrt/inference.py b/models/cv/detection/yolov8/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..d83b013610c132a776a2dc02663177e20a7ea2e3 --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/inference.py @@ -0,0 +1,237 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import json +import argparse +import time +import tensorrt +from tensorrt import Dims +import pycuda.autoinit +import pycuda.driver as cuda +import torch +import numpy as np +from tqdm import tqdm + +from common import create_engine_context, setup_io_bindings + +from pathlib import Path + +from ultralytics.cfg import get_cfg +from ultralytics.data import converter +from ultralytics.utils import DEFAULT_CFG +from ultralytics.data.utils import check_det_dataset +from ultralytics.utils.metrics import ConfusionMatrix +from ultralytics.models.yolo.detect import DetectionValidator + +coco_classes = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', + 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', + 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', + 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', + 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', + 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', + 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'} + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_engine", + type=str, + required=True, + help="ixrt engine path.") + + parser.add_argument("--bsz", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--warm_up", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=0.0, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=0.0, + help="Model inference FPS target.") + + parser.add_argument("--conf", + type=float, + default=0.001, + help="confidence threshold.") + + parser.add_argument("--iou", + type=float, + default=0.65, + help="iou threshold.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +class IxRT_Validator(DetectionValidator): + def __call__(self, config, data): + self.data = data + self.stride = 32 + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + self.init_metrics() + + total_num = 0 + + input_name = "input" + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + engine, context = create_engine_context(config.model_engine, logger) + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims((config.bsz,3,config.imgsz,config.imgsz))) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + forward_time = 0.0 + num_samples = 0 + + for batch in tqdm(self.dataloader): + batch = self.preprocess(batch) + + imgs = batch['img'] + pad_batch = len(imgs) != self.args.batch + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:])) + + batch_data = np.ascontiguousarray(imgs) + data_shape = batch_data.shape + + cur_bsz_sample = batch_data.shape[0] + num_samples += cur_bsz_sample + + # Set input + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims(data_shape)) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + cuda.memcpy_htod(inputs[0]["allocation"], batch_data) + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + + + start_time = time.time() + context.execute_v2(allocations) + end_time = time.time() + forward_time += end_time - start_time + + cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + if pad_batch: + output = output[:origin_size] + + outputs = torch.from_numpy(output) + + preds = self.postprocess([outputs]) + + self.update_metrics(preds, batch) + + if config.perf_only: + fps = num_samples / forward_time + return fps + else: + stats = self.get_stats() + + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + print(f'Saving {f.name} ...') + json.dump(self.jdict, f) # flatten and save + + stats = self.eval_json(stats) + + return stats + + def init_metrics(self): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = self.data['names'] + self.nc = len(self.names) + self.metrics.names = self.names + self.confusion_matrix = ConfusionMatrix(nc=80) + self.seen = 0 + self.jdict = [] + self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[]) + +def main(): + config = parse_args() + + batch_size = config.bsz + + overrides = {'mode': 'val'} + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = batch_size + cfg_args.save_json = True + + data = { + 'path': Path(config.datasets), + 'val': os.path.join(config.datasets, 'val2017.txt'), + 'names': coco_classes + } + + validator = IxRT_Validator(args=cfg_args, save_dir=Path('.')) + + if config.perf_only: + fps = validator(config, data) + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + else: + stats = validator(config, data) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/yolov8/ixrt/quant.py b/models/cv/detection/yolov8/ixrt/quant.py new file mode 100644 index 0000000000000000000000000000000000000000..70265cbc25d24d4ed41640c76f78a1839555f749 --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/quant.py @@ -0,0 +1,105 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from common import letterbox + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov4_bs16_without_decoder.onnx") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_quant_model", type=str, help="save the quantization model path", default=None) + parser.add_argument("--bsz", type=int, default=16) + parser.add_argument("--step", type=int, default=32) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=608) + parser.add_argument("--use_letterbox", action="store_true") + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + + +def get_dataloader(data_dir, step=32, batch_size=16, new_shape=[608, 608], use_letterbox=False): + num = step * batch_size + val_list = [os.path.join(data_dir, x) for x in os.listdir(data_dir)] + random.shuffle(val_list) + pic_list = val_list[:num] + + calibration_dataset = [] + for file_path in pic_list: + pic_data = cv2.imread(file_path) + org_img = pic_data + assert org_img is not None, 'Image not Found ' + file_path + h0, w0 = org_img.shape[:2] + + if use_letterbox: + img, ratio, dwdh = letterbox(org_img, new_shape=(new_shape[1], new_shape[0]), auto=False, scaleup=True) + else: + img = cv2.resize(org_img, new_shape) + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) / 255.0 # 0~1 np array + img = torch.from_numpy(img).float() + + calibration_dataset.append(img) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=True, + batch_size=batch_size, + drop_last=True + ) + return calibration_dataloader + +dataloader = get_dataloader( + data_dir=args.dataset_dir, + step=args.step, + batch_size=args.bsz, + new_shape=(args.imgsz, args.imgsz), + use_letterbox=args.use_letterbox +) + +dirname = os.path.dirname(args.save_quant_model) +quant_json_path = os.path.join(dirname, f"quantized_{model_name}.json") + +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=args.save_quant_model, + save_quant_params_path=quant_json_path, + observer=args.observer, + data_preprocess=lambda x: x.to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) diff --git a/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_fp16_accuracy.sh b/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..44e7537657a65fc84d89531b8df9ad647513dfbe --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_fp16_accuracy.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=$(cd $(dirname $0);cd ../; pwd) +DATASETS_DIR="${PROJ_DIR}/data/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/data" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov8n +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov8n.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov8n_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --acc_target 0.3 +exit ${EXIT_STATUS} diff --git a/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_fp16_performance.sh b/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..1ab3808f1f45cf2072fa41a2107fa88c17fa3610 --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_fp16_performance.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=$(cd $(dirname $0);cd ../; pwd) +DATASETS_DIR="${PROJ_DIR}/data/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/data" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov8n +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov8n.onnx + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov8n_fp16.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision float16 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --perf_only true \ + --fps_target 0.0 +exit ${EXIT_STATUS} diff --git a/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_int8_accuracy.sh b/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..a2257463d70ee8fe6e9853db0fafd44f98ad8c83 --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_int8_accuracy.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=$(cd $(dirname $0);cd ../; pwd) +DATASETS_DIR="${PROJ_DIR}/data/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/data" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR} +DISABLE_NAMES=('/model.22/Concat' '/model.22/Concat_1' '/model.22/Concat_2' '/model.22/Reshape' '/model.22/Reshape_1' '/model.22/Reshape_2' '/model.22/Concat_3' '/model.22/Split' '/model.22/dfl/Reshape' '/model.22/dfl/Transpose' '/model.22/dfl/Softmax' '/model.22/dfl/Transpose_1' '/model.22/dfl/conv/Conv' '/model.22/dfl/Reshape_1' '/model.22/Slice' '/model.22/Slice_1' '/model.22/Sub' '/model.22/Add_1' '/model.22/Add_2' '/model.22/Div_1' '/model.22/Sub_1' '/model.22/Concat_4' '/model.22/Mul_2' '/model.22/Sigmoid' '/model.22/Concat_5') + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov8n +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov8n.onnx + +# quant +FINAL_MODEL=${CHECKPOINTS_DIR}/quantized_yolov8n_bs${BATCH_SIZE}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Quantize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/quant.py \ + --model_name "YOLOV8N" \ + --model ${CURRENT_MODEL} \ + --bsz ${BATCH_SIZE} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --observer "hist_percentile" \ + --save_quant_model ${FINAL_MODEL} \ + --disable_quant_names '/model.22/Concat' '/model.22/Concat_1' '/model.22/Concat_2' '/model.22/Reshape' '/model.22/Reshape_1' '/model.22/Reshape_2' '/model.22/Concat_3' '/model.22/Split' '/model.22/dfl/Reshape' '/model.22/dfl/Transpose' '/model.22/dfl/Softmax' '/model.22/dfl/Transpose_1' '/model.22/dfl/conv/Conv' '/model.22/dfl/Reshape_1' '/model.22/Slice' '/model.22/Slice_1' '/model.22/Sub' '/model.22/Add_1' '/model.22/Add_2' '/model.22/Div_1' '/model.22/Sub_1' '/model.22/Concat_4' '/model.22/Mul_2' '/model.22/Sigmoid' '/model.22/Concat_5' \ + --imgsz 640 + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov8n_int8.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision int8 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --acc_target 0.3 +exit ${EXIT_STATUS} diff --git a/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_int8_performance.sh b/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..f1774d5b2b28ce734dadb3e022a3359b3790f2da --- /dev/null +++ b/models/cv/detection/yolov8/ixrt/scripts/infer_yolov8n_int8_performance.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +PROJ_DIR=$(cd $(dirname $0);cd ../; pwd) +DATASETS_DIR="${PROJ_DIR}/data/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/data" +RUN_DIR="${PROJ_DIR}" +ORIGINE_MODEL=${CHECKPOINTS_DIR} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo ====================== Model Info ====================== +echo Model Name : yolov8n +echo Onnx Path : ${ORIGINE_MODEL} + +BATCH_SIZE=32 +CURRENT_MODEL=${CHECKPOINTS_DIR}/yolov8n.onnx + +# quant +FINAL_MODEL=${CHECKPOINTS_DIR}/quantized_yolov8n_bs${BATCH_SIZE}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Quantize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/quant.py \ + --model_name "YOLOV8N" \ + --model ${CURRENT_MODEL} \ + --bsz ${BATCH_SIZE} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --observer "hist_percentile" \ + --save_quant_model ${FINAL_MODEL} \ + --disable_quant_names '/model.22/Concat' '/model.22/Concat_1' '/model.22/Concat_2' '/model.22/Reshape' '/model.22/Reshape_1' '/model.22/Reshape_2' '/model.22/Concat_3' '/model.22/Split' '/model.22/dfl/Reshape' '/model.22/dfl/Transpose' '/model.22/dfl/Softmax' '/model.22/dfl/Transpose_1' '/model.22/dfl/conv/Conv' '/model.22/dfl/Reshape_1' '/model.22/Slice' '/model.22/Slice_1' '/model.22/Sub' '/model.22/Add_1' '/model.22/Add_2' '/model.22/Div_1' '/model.22/Sub_1' '/model.22/Concat_4' '/model.22/Mul_2' '/model.22/Sigmoid' '/model.22/Concat_5' \ + --imgsz 640 + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +echo Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/yolov8n_int8.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision int8 \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +echo Inference +RUN_BATCH_SIZE=32 +python3 ${RUN_DIR}/inference.py \ + --model_engine ${ENGINE_FILE} \ + --warm_up 2 \ + --bsz ${RUN_BATCH_SIZE} \ + --imgsz 640 \ + --datasets ${DATASETS_DIR} \ + --perf_only true \ + --fps_target 0.0 +exit ${EXIT_STATUS} \ No newline at end of file