diff --git a/models/cv/detection/fcos/ixrt/README.md b/models/cv/detection/fcos/ixrt/README.md new file mode 100755 index 0000000000000000000000000000000000000000..e0091c48e62f0f2b81ee37d759b8af5c7b096cf4 --- /dev/null +++ b/models/cv/detection/fcos/ixrt/README.md @@ -0,0 +1,88 @@ +# Fcos + +## Description + +FCOS is an anchor-free model based on the Fully Convolutional Network (FCN) architecture for pixel-wise object detection. It implements a proposal-free solution and introduces the concept of centerness. +For more details, please refer to our [report on Arxiv](https://arxiv.org/abs/1904.01355). + +## Setup + +### Install +``` +yum install mesa-libGL +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install ultralytics +pip3 install pycocotools +pip3 install addict +pip3 install yapf +pip3 install pycuda +pip3 install mmdet==2.28.2 +pip3 install opencv-python==4.6.0.66 +``` + +### Dependency + +The inference of the FCOS model requires a dependency on a well-adapted mmcv-v1.7.0 library. Please inquire with the staff to obtain the relevant libraries. + +```bash + +cd mmcv +sh build_mmcv.sh +sh install_mmcv.sh +``` + +### Download + +Pretrained model: + +- COCO2017数据集准备参�?: https://cocodataset.org/ + - 图片目录: Path/To/val2017/*.jpg + - 标注文件目录: Path/To/annotations/instances_val2017.json + +### Model Conversion + +MMDetection is an open source object detection toolbox based on PyTorch. It is a part of the OpenMMLab project.It is utilized for model conversion. In MMDetection, Execute model conversion command, and the checkpoints folder needs to be created, (mkdir checkpoints) in project +```bash + +git clone -b v2.25.0 https://github.com/open-mmlab/mmdetection.git +cd mmdetection +python3 tools/deployment/pytorch2onnx.py \ + /Path/to/fcos/ixrt/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py \ + checkpoints/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth \ + --output-file /Path/To/ixrt/data/checkpoints/r50_fcos.onnx \ + --input-img demo/demo.jpg \ + --test-img tests/data/color.jpg \ + --shape 800 800 \ + --show \ + --verify \ + --skip-postprocess \ + --dynamic-export \ + --cfg-options \ + model.test_cfg.deploy_nms_pre=-1 \ + +``` +If there are issues such as input parameter mismatch during model export, it may be due to ONNX version. To resolve this, please delete the last parameter (dynamic_slice) from the return value of the _slice_helper function in the /usr/local/lib/python3.10/site-packages/mmcv/onnx/onnx_utils/symbolic_helper.py file. + +## Inference +```bash +export PROJ_DIR=./ +export DATASETS_DIR=/Path/to/coco/ +export CHECKPOINTS_DIR=/Path/to/checkpoints +export RUN_DIR=./ +``` + +### FP16 +```bash +# Accuracy +bash scripts/infer_fcos_fp16_accuracy.sh +# Performance +bash scripts/infer_fcos_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 | +--------|-----------|----------|---------|----------|-------------| +Fcos | 1 | FP16 | 51.62 | 0.546 | 0.360 | \ No newline at end of file diff --git a/models/cv/detection/fcos/ixrt/build_engine.py b/models/cv/detection/fcos/ixrt/build_engine.py new file mode 100755 index 0000000000000000000000000000000000000000..af649916756a27bde0aea18b9f3572a430a424d9 --- /dev/null +++ b/models/cv/detection/fcos/ixrt/build_engine.py @@ -0,0 +1,103 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from calibration_dataset import getdataloader +import cuda.cudart as cudart + +def assertSuccess(err): + assert(err == cudart.cudaError_t.cudaSuccess) + +class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): + + def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=800): + super().__init__() + self.cache_file = cache_file + self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) + self.batch_generator = iter(self.image_batcher) + size = img_sz*img_sz*3*bsz + __import__('pdb').set_trace() + err, self.batch_allocation = cudart.cudaMalloc(size) + assertSuccess(err) + + def __del__(self): + err,= cudart.cudaFree(self.batch_allocation) + assertSuccess(err) + + def get_batch_size(self): + return self.image_batcher.batch_size + + def get_batch(self, names): + try: + batch, _ = next(self.batch_generator) + batch = batch.numpy() + __import__('pdb').set_trace() + cudart.cudaMemcpy(self.batch_allocation, + np.ascontiguousarray(batch), + batch.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) + return [int(self.batch_allocation)] + except StopIteration: + return None + + def read_calibration_cache(self): + if os.path.exists(self.cache_file): + with open(self.cache_file, "rb") as f: + return f.read() + + def write_calibration_cache(self, cache): + with open(self.cache_file, "wb") as f: + f.write(cache) + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.FP16 + print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--engine", type=str, default=None) + parser.add_argument( + "--datasets_dir", + type=str, + default="", + help="ImageNet dir", + ) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/fcos/ixrt/calibration_dataset.py b/models/cv/detection/fcos/ixrt/calibration_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..d7525d5136168cc8fb1d24a28f1b71b85ce4cc92 --- /dev/null +++ b/models/cv/detection/fcos/ixrt/calibration_dataset.py @@ -0,0 +1,113 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os + +import torch +import torchvision.datasets +from torch.utils.data import DataLoader +from torchvision import models +from torchvision import transforms as T + + +class CalibrationImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + # if self.target_transform is not None: + # target = self.target_transform(target) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/detection/fcos/ixrt/common.py b/models/cv/detection/fcos/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..b18a24394c934c40f1f1ab761ff946edbf69f53a --- /dev/null +++ b/models/cv/detection/fcos/ixrt/common.py @@ -0,0 +1,97 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import numpy as np +from tqdm import tqdm + +import tensorrt +import pycuda.driver as cuda + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result): + for i, boxes in enumerate(pred_boxes): + image_id = int(batch_img_id) + if boxes is not None: + x, y, w, h, c, p = boxes + if image_id!=-1: + + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": c, + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + allocation = cuda.mem_alloc(size) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + } + # print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/detection/fcos/ixrt/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py b/models/cv/detection/fcos/ixrt/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..72d17de86f01b3f4b1b39e8ea6fb0dfa32abfe0a --- /dev/null +++ b/models/cv/detection/fcos/ixrt/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,69 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py' + +model = dict( + backbone=dict( + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + bbox_head=dict( + norm_on_bbox=True, + centerness_on_reg=True, + dcn_on_last_conv=False, + center_sampling=True, + conv_bias=True, + loss_bbox=dict(type='GIoULoss', loss_weight=1.0)), + # training and testing settings + test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6))) + +# dataset settings +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer_config = dict(_delete_=True, grad_clip=None) + +lr_config = dict(warmup='linear') \ No newline at end of file diff --git a/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py b/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..a5bdf3e79352dd1a132c6b825ecd56129b291288 --- /dev/null +++ b/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py @@ -0,0 +1,198 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import sys +from tqdm import tqdm +import numpy as np + +import argparse + +import torch +import mmcv +from mmdet.datasets import build_dataloader, build_dataset +from mmdet.models import build_detector +from mmdet.core import bbox2result +import cv2 +import numpy as np +import onnxruntime as rt + +import time + +import os +import copy +from common import create_engine_context, get_io_bindings +import pycuda.autoinit +import pycuda.driver as cuda +import tensorrt +from tensorrt import Dims + +def check_target(inference, target): + satisfied = False + if inference > target: + satisfied = True + return satisfied + +def get_dataloder(args): + cfg_path = args.cfg_file + cfg = mmcv.Config.fromfile(cfg_path) + datasets_path = args.data_path + cfg['data']['val']['img_prefix'] = os.path.join(datasets_path, 'val2017') + cfg['data']['val']['ann_file'] = os.path.join(datasets_path, 'annotations/instances_val2017.json') + dataset = build_dataset(cfg.data.val) + data_loader = build_dataloader(dataset, samples_per_gpu=args.batch_size, workers_per_gpu=args.num_workers, shuffle=False) + model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) + return dataset, data_loader, model + +def eval_coco(args, inputs, outputs, allocations, context): + dataset, dataloader, model = get_dataloder(args) + + # Prepare the output data + outputs_651 = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + outputs_766 = np.zeros(outputs[1]["shape"], outputs[1]["dtype"]) + outputs_881 = np.zeros(outputs[2]["shape"], outputs[2]["dtype"]) + outputs_996 = np.zeros(outputs[3]["shape"], outputs[3]["dtype"]) + outputs_1111 = np.zeros(outputs[4]["shape"], outputs[4]["dtype"]) + outputs_713 = np.zeros(outputs[5]["shape"], outputs[5]["dtype"]) + outputs_828 = np.zeros(outputs[6]["shape"], outputs[6]["dtype"]) + outputs_943 = np.zeros(outputs[7]["shape"], outputs[7]["dtype"]) + outputs_1058 = np.zeros(outputs[8]["shape"], outputs[8]["dtype"]) + outputs_1173 = np.zeros(outputs[9]["shape"], outputs[9]["dtype"]) + outputs_705 = np.zeros(outputs[10]["shape"], outputs[10]["dtype"]) + outputs_820 = np.zeros(outputs[11]["shape"], outputs[11]["dtype"]) + outputs_935 = np.zeros(outputs[12]["shape"], outputs[12]["dtype"]) + outputs_1050 = np.zeros(outputs[13]["shape"], outputs[13]["dtype"]) + outputs_1165 = np.zeros(outputs[14]["shape"], outputs[14]["dtype"]) + + preds = [] + for batch in tqdm(dataloader): + image = batch['img'][0].data.numpy() + image = image.astype(inputs[0]["dtype"]) + # Set input + image = np.ascontiguousarray(image) + cuda.memcpy_htod(inputs[0]["allocation"], image) + context.execute_v2(allocations) + # # Fetch output + cuda.memcpy_dtoh(outputs_651, outputs[0]["allocation"]) + cuda.memcpy_dtoh(outputs_766, outputs[1]["allocation"]) + cuda.memcpy_dtoh(outputs_881, outputs[2]["allocation"]) + cuda.memcpy_dtoh(outputs_996, outputs[3]["allocation"]) + cuda.memcpy_dtoh(outputs_1111, outputs[4]["allocation"]) + cuda.memcpy_dtoh(outputs_713, outputs[5]["allocation"]) + cuda.memcpy_dtoh(outputs_828, outputs[6]["allocation"]) + cuda.memcpy_dtoh(outputs_943, outputs[7]["allocation"]) + cuda.memcpy_dtoh(outputs_1058, outputs[8]["allocation"]) + cuda.memcpy_dtoh(outputs_1173, outputs[9]["allocation"]) + cuda.memcpy_dtoh(outputs_705, outputs[10]["allocation"]) + cuda.memcpy_dtoh(outputs_820, outputs[11]["allocation"]) + cuda.memcpy_dtoh(outputs_935, outputs[12]["allocation"]) + cuda.memcpy_dtoh(outputs_1050, outputs[13]["allocation"]) + cuda.memcpy_dtoh(outputs_1165, outputs[14]["allocation"]) + + cls_score = [] + box_reg = [] + score_factors = [] + cls_score.append(torch.from_numpy(outputs_651)) + cls_score.append(torch.from_numpy(outputs_766)) + cls_score.append(torch.from_numpy(outputs_881)) + cls_score.append(torch.from_numpy(outputs_996)) + cls_score.append(torch.from_numpy(outputs_1111)) + + box_reg.append(torch.from_numpy(outputs_713)) + box_reg.append(torch.from_numpy(outputs_828)) + box_reg.append(torch.from_numpy(outputs_943)) + box_reg.append(torch.from_numpy(outputs_1058)) + box_reg.append(torch.from_numpy(outputs_1173)) + + score_factors.append(torch.from_numpy(outputs_705)) + score_factors.append(torch.from_numpy(outputs_820)) + score_factors.append(torch.from_numpy(outputs_935)) + score_factors.append(torch.from_numpy(outputs_1050)) + score_factors.append(torch.from_numpy(outputs_1165)) + + cls_score.sort(key=lambda x: x.shape[3],reverse=True) + box_reg.sort(key=lambda x: x.shape[3],reverse=True) + score_factors.sort(key=lambda x: x.shape[3],reverse=True) + + pred = model.bbox_head.get_bboxes(cls_score, box_reg, score_factors=score_factors, img_metas=batch['img_metas'][0].data[0], rescale=True) + bbox_results = [ + bbox2result(det_bboxes, det_labels, model.bbox_head.num_classes) + for det_bboxes, det_labels in pred + ] + preds.extend(bbox_results) + eval_results = dataset.evaluate(preds, metric=['bbox']) + print(eval_results) + + map50 = eval_results['bbox_mAP_50'] + return map50 + +def parse_args(): + parser = argparse.ArgumentParser() + # engine args + parser.add_argument("--engine", type=str, default="./r50_fcos.engine") + parser.add_argument("--cfg_file", type=str, default="fcos_r50_caffe_fpn_gn-head_1x_coco.py") + parser.add_argument("--data_path", type=str, default="/home/datasets/cv/coco") + parser.add_argument("--batch_size", type=int, default=16) + parser.add_argument("--num_workers", type=int, default=4) + parser.add_argument("--image_file", type=str, default="/home/fangjian.hu/workspace/ixrt/data/fcos_test/test_800.jpg") + parser.add_argument("--warp_up", type=int, default=40) + parser.add_argument("--loop_count", type=int, default=50) + + parser.add_argument("--target_map", default=0.56, type=float, help="target map0.5") + parser.add_argument("--target_fps", default=50, type=float, help="target fps") + parser.add_argument("--task", default="precision", type=str, help="precision or pref") + + + args = parser.parse_args() + return args + +def main(): + args= parse_args() + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(args.engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + if args.task=="precision": + map50= eval_coco(args,inputs, outputs, allocations, context) + + print("="*40) + print("MAP50:{0}".format(round(map50,3))) + print("="*40) + print(f"Check MAP50 Test : {round(map50,3)} Target:{args.target_map} State : {'Pass' if round(map50,3) >= args.target_map else 'Fail'}") + status_map = check_target(map50, args.target_map) + sys.exit(int(not (status_map))) + + else: + torch.cuda.synchronize() + start_time = time.time() + for i in range(args.loop_count): + context.execute_v2(allocations) + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + fps = args.loop_count * args.batch_size / forward_time + print("="*40) + print("fps:{0}".format(round(fps,2))) + print("="*40) + print(f"Check fps Test : {round(fps,3)} Target:{args.target_fps} State : {'Pass' if fps >= args.target_fps else 'Fail'}") + status_fps = check_target(fps, args.target_fps) + sys.exit(int(not (status_fps))) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/fcos/ixrt/fcos_r50_caffe_fpn_gn-head_1x_coco.py b/models/cv/detection/fcos/ixrt/fcos_r50_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..758d1d88571ac49ae26bc124f0052716bbf761d4 --- /dev/null +++ b/models/cv/detection/fcos/ixrt/fcos_r50_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,103 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# model settings +model = dict( + type='FCOS', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron/resnet50_caffe')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', # use P5 + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='FCOSHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', loss_weight=1.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) + +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(800, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(800, 800)), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=32, + workers_per_gpu=1, + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'images/val2017/', + pipeline=test_pipeline) + ) +evaluation = dict(interval=1, metric='bbox') \ No newline at end of file diff --git a/models/cv/detection/fcos/ixrt/scripts/infer_fcos_fp16_accuracy.sh b/models/cv/detection/fcos/ixrt/scripts/infer_fcos_fp16_accuracy.sh new file mode 100755 index 0000000000000000000000000000000000000000..b6ccfe626f4a0f1ab6247aac4c4dc14f1998d3cb --- /dev/null +++ b/models/cv/detection/fcos/ixrt/scripts/infer_fcos_fp16_accuracy.sh @@ -0,0 +1,96 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=1 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +MODEL_NAME="r50_fcos" + +echo PROJ_DIR ${PROJ_DIR} +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} + +step=0 + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model Skipped, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CHECKPOINTS_DIR}/${MODEL_NAME}.onnx \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --model ${SIM_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/fcos_ixrt_inference.py \ + --engine ${ENGINE_FILE} \ + --cfg_file ${RUN_DIR}/fcos_r50_caffe_fpn_gn-head_1x_coco.py \ + --task "precision" \ + --data_path ${DATASETS_DIR} \ + --batch_size 1 \ + --target_map 0.54; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/fcos/ixrt/scripts/infer_fcos_fp16_performance.sh b/models/cv/detection/fcos/ixrt/scripts/infer_fcos_fp16_performance.sh new file mode 100755 index 0000000000000000000000000000000000000000..2bcf4d56b3fde440ecfdefd56be02cb1b673e428 --- /dev/null +++ b/models/cv/detection/fcos/ixrt/scripts/infer_fcos_fp16_performance.sh @@ -0,0 +1,95 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=1 +WARM_UP=-1 +TGT=-1 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +MODEL_NAME="r50_fcos" + +echo PROJ_DIR ${PROJ_DIR} +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} + +step=0 + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model Skipped, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CHECKPOINTS_DIR}/${MODEL_NAME}.onnx \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --model ${SIM_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/fcos_ixrt_inference.py \ + --engine ${ENGINE_FILE} \ + --cfg_file ${RUN_DIR}/fcos_r50_caffe_fpn_gn-head_1x_coco.py \ + --task "pref" \ + --batch_size 1 \ + --target_fps 40; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/detection/fcos/ixrt/simplify_model.py b/models/cv/detection/fcos/ixrt/simplify_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1400fd81ddb4b3fae1b20d0fd35082a692f5d292 --- /dev/null +++ b/models/cv/detection/fcos/ixrt/simplify_model.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file