diff --git a/models/cv/detection/hrnet/ixrt/README.md b/models/cv/detection/hrnet/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cade087c4e00dac7b92db9943932facf97ba04e4 --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/README.md @@ -0,0 +1,60 @@ +# HRNet + +## Description + +HRNet is an advanced deep learning architecture for human pose estimation, characterized by its maintenance of high-resolution representations throughout the entire network process, thereby avoiding the low-to-high resolution recovery step typical of traditional models. The network features parallel multi-resolution subnetworks and enriches feature representation through repeated multi-scale fusion, which enhances the accuracy of keypoint detection. Additionally, HRNet offers computational efficiency and has demonstrated superior performance over previous methods on several standard datasets. + +## Setup + +### Install + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-dev + +pip3 install -r requirements.txt +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +# export onnx model +python3 export.py --weight fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth --cfg fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py --output hrnet.onnx + +# Use onnxsim optimize onnx model +onnxsim hrnet.onnx hrnet_opt.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_hrnet_fp16_accuracy.sh +# Performance +bash scripts/infer_hrnet_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +-------|-----------|----------|----------|----------|---------------| +HRNet | 32 | FP16 | 75.199 | 0.491 | 0.327 | + +## Reference + +mmdetection: diff --git a/models/cv/detection/hrnet/ixrt/build_engine.py b/models/cv/detection/hrnet/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..61834f4fe79fe2b9bb4cfc01561b01f177098f69 --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/build_engine.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt +from tensorrt import Dims + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + profile = builder.create_optimization_profile() + profile.set_shape("input", Dims([32, 3, 800, 800]), Dims([32, 3, 800, 800]), Dims([32, 3, 800, 800])) + build_config.add_optimization_profile(profile) + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + build_config.set_flag(precision) + num_inputs = network.num_inputs + + for i in range(num_inputs): + input_tensor = network.get_input(i) + input_tensor.shape = Dims([32, 3, 800, 800]) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="float16", + help="The precision of datatype") + parser.add_argument("--engine", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/detection/hrnet/ixrt/common.py b/models/cv/detection/hrnet/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..ef92a6ba6291058d20f575edb09da35ebff3a937 --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/common.py @@ -0,0 +1,69 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import glob +import torch +import tensorrt +import numpy as np +from cuda import cuda, cudart + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/detection/hrnet/ixrt/deploy_default.py b/models/cv/detection/hrnet/ixrt/deploy_default.py new file mode 100644 index 0000000000000000000000000000000000000000..b8d8e43dc829456f0c2e46a7acfc3128757f945d --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/deploy_default.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +onnx_config = dict( + type='onnx', + export_params=True, + keep_initializers_as_inputs=False, + opset_version=11, + save_file='end2end.onnx', + input_names=['input'], + output_names=['output'], + input_shape=None, + optimize=True) + +codebase_config = dict( + type='mmdet', + task='ObjectDetection', + model_type='end2end', + post_processing=dict( + score_threshold=0.05, + confidence_threshold=0.005, + iou_threshold=0.5, + max_output_boxes_per_class=200, + pre_top_k=5000, + keep_top_k=100, + background_label_id=-1, + )) + +backend_config = dict(type='onnxruntime') \ No newline at end of file diff --git a/models/cv/detection/hrnet/ixrt/export.py b/models/cv/detection/hrnet/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..bceaba7801843feb5ef095f62a71a2f6e0074db4 --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/export.py @@ -0,0 +1,74 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse + +import torch +from mmdeploy.utils import load_config +from mmdeploy.apis import build_task_processor + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--cfg", + type=str, + required=True, + help="model config file.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + deploy_cfg = 'deploy_default.py' + model_cfg = args.cfg + model_checkpoint = args.weight + + deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) + + task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu') + + model = task_processor.build_pytorch_model(model_checkpoint) + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 800, 800) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == '__main__': + main() + diff --git a/models/cv/detection/hrnet/ixrt/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py b/models/cv/detection/hrnet/ixrt/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..faccb7732d7063f680cf023fce4fc3ece716583d --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py @@ -0,0 +1,287 @@ +auto_scale_lr = dict(base_batch_size=16, enable=False) +backend_args = None +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +default_hooks = dict( + checkpoint=dict(interval=1, type='CheckpointHook'), + logger=dict(interval=50, type='LoggerHook'), + param_scheduler=dict(type='ParamSchedulerHook'), + sampler_seed=dict(type='DistSamplerSeedHook'), + timer=dict(type='IterTimerHook'), + visualization=dict(type='DetVisualizationHook')) +default_scope = 'mmdet' +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +load_from = None +log_level = 'ERROR' +log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) +model = dict( + backbone=dict( + extra=dict( + stage1=dict( + block='BOTTLENECK', + num_blocks=(4, ), + num_branches=1, + num_channels=(64, ), + num_modules=1), + stage2=dict( + block='BASIC', + num_blocks=( + 4, + 4, + ), + num_branches=2, + num_channels=( + 18, + 36, + ), + num_modules=1), + stage3=dict( + block='BASIC', + num_blocks=( + 4, + 4, + 4, + ), + num_branches=3, + num_channels=( + 18, + 36, + 72, + ), + num_modules=4), + stage4=dict( + block='BASIC', + num_blocks=( + 4, + 4, + 4, + 4, + ), + num_branches=4, + num_channels=( + 18, + 36, + 72, + 144, + ), + num_modules=3)), + init_cfg=dict( + checkpoint='open-mmlab://msra/hrnetv2_w18', type='Pretrained'), + type='HRNet'), + bbox_head=dict( + feat_channels=256, + in_channels=256, + loss_bbox=dict(loss_weight=1.0, type='IoULoss'), + loss_centerness=dict( + loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True), + loss_cls=dict( + alpha=0.25, + gamma=2.0, + loss_weight=1.0, + type='FocalLoss', + use_sigmoid=True), + num_classes=80, + stacked_convs=4, + strides=[ + 8, + 16, + 32, + 64, + 128, + ], + type='FCOSHead'), + data_preprocessor=dict( + bgr_to_rgb=False, + mean=[ + 103.53, + 116.28, + 123.675, + ], + pad_size_divisor=32, + std=[ + 57.375, + 57.12, + 58.395, + ], + type='DetDataPreprocessor'), + neck=dict( + in_channels=[ + 18, + 36, + 72, + 144, + ], + num_outs=5, + out_channels=256, + stride=2, + type='HRFPN'), + test_cfg=dict( + max_per_img=100, + min_bbox_size=0, + nms=dict(iou_threshold=0.5, type='nms'), + nms_pre=1000, + score_thr=0.05), + type='FCOS') +optim_wrapper = dict( + clip_grad=dict(max_norm=35, norm_type=2), + optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001), + paramwise_cfg=dict(bias_decay_mult=0.0, bias_lr_mult=2.0), + type='OptimWrapper') +param_scheduler = [ + dict( + begin=0, + by_epoch=False, + end=500, + factor=0.3333333333333333, + type='ConstantLR'), + dict( + begin=0, + by_epoch=True, + end=12, + gamma=0.1, + milestones=[ + 8, + 11, + ], + type='MultiStepLR'), +] +resume = False +test_cfg = dict(type='TestLoop') +test_dataloader = dict( + batch_size=32, + dataset=dict( + ann_file='annotations/instances_val2017.json', + backend_args=None, + data_prefix=dict(img='val2017/'), + data_root='/home/xinchi.tian/ixrt-modelzoo/data/datasets/coco2017', + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +test_evaluator = dict( + ann_file= + '/home/xinchi.tian/ixrt-modelzoo/data/datasets/coco2017/annotations/instances_val2017.json', + backend_args=None, + format_only=False, + metric='bbox', + type='CocoMetric') +test_pipeline = [ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), +] +train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1) +train_dataloader = dict( + batch_sampler=dict(type='AspectRatioBatchSampler'), + batch_size=4, + dataset=dict( + ann_file='annotations/instances_train2017.json', + backend_args=None, + data_prefix=dict(img='train2017/'), + data_root='data/coco/', + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 1333, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PackDetInputs'), + ], + type='CocoDataset'), + num_workers=4, + persistent_workers=True, + sampler=dict(shuffle=True, type='DefaultSampler')) +train_pipeline = [ + dict(backend_args=None, type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 1333, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PackDetInputs'), +] +val_cfg = dict(type='ValLoop') +val_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file='annotations/instances_val2017.json', + backend_args=None, + data_prefix=dict(img='val2017/'), + data_root='data/coco/', + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +val_evaluator = dict( + ann_file='data/coco/annotations/instances_val2017.json', + backend_args=None, + format_only=False, + metric='bbox', + type='CocoMetric') +vis_backends = [ + dict(type='LocalVisBackend'), +] +visualizer = dict( + name='visualizer', + type='DetLocalVisualizer', + vis_backends=[ + dict(type='LocalVisBackend'), + ]) +work_dir = './' diff --git a/models/cv/detection/hrnet/ixrt/inference.py b/models/cv/detection/hrnet/ixrt/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..f5e84d967d991eb4de2fb9eb111737d6b74f2976 --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/inference.py @@ -0,0 +1,190 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import time +import argparse +import tensorrt +import torch +import torchvision +import numpy as np +from tensorrt import Dims +from cuda import cuda, cudart +from tqdm import tqdm +from mmdet.registry import RUNNERS +from mmengine.config import Config + +from common import create_engine_context, get_io_bindings + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + batch_size = args.batchsize + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine && I/O bindings + engine, context = create_engine_context(args.engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + if args.warmup > 0: + print("\nWarm Start.") + for i in range(args.warmup): + context.execute_v2(allocations) + print("Warm Done.") + + # just run perf test + if args.perf_only: + torch.cuda.synchronize() + start_time = time.time() + + for i in range(10): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + num_samples = 10 * args.batchsize + fps = num_samples / forward_time + + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {args.fps_target}") + if fps >= args.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(1) + else: + # Runner config + cfg = Config.fromfile("fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py") + cfg.work_dir = "./" + + cfg['test_dataloader']['batch_size'] = batch_size + cfg['test_dataloader']['dataset']['data_root'] = args.datasets + cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' + cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json') + cfg['log_level'] = 'ERROR' + + runner = RUNNERS.build(cfg) + + for input_data in tqdm(runner.test_dataloader): + + input_data = runner.model.data_preprocessor(input_data, False) + image = input_data['inputs'].cpu() + image = image.numpy().astype(inputs[0]["dtype"]) + pad_batch = len(image) != batch_size + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + image = np.ascontiguousarray(image) + + (err,) = cudart.cudaMemcpy( + inputs[0]["allocation"], + image, + image.nbytes, + cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, + ) + assert err == cudart.cudaError_t.cudaSuccess + + context.execute_v2(allocations) + + cls_score = [] + box_reg = [] + score_factors = [] + for i in range(len(outputs)): + output = np.zeros(outputs[i]["shape"], outputs[i]["dtype"]) + (err,) = cudart.cudaMemcpy( + output, + outputs[i]["allocation"], + outputs[i]["nbytes"], + cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, + ) + assert err == cudart.cudaError_t.cudaSuccess + + if pad_batch: + output = output[:origin_size] + + output = torch.from_numpy(output) + + if output.shape[1] == 80: + cls_score.append(output) + elif output.shape[1] == 4: + box_reg.append(output) + else: + score_factors.append(output) + + batch_img_metas = [ + data_samples.metainfo for data_samples in input_data['data_samples'] + ] + + results_list = runner.model.bbox_head.predict_by_feat(cls_score, box_reg, score_factors, batch_img_metas=batch_img_metas, rescale=True) + + batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], results_list) + + runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=input_data) + + metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/hrnet/ixrt/requirements.txt b/models/cv/detection/hrnet/ixrt/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..97ac9c0458744fb56d62781ffd96279f893817f3 --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/requirements.txt @@ -0,0 +1,6 @@ +onnx +tqdm +onnxsim +mmdet +mmdeploy +mmengine diff --git a/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_accuracy.sh b/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..74f55c2970c1b3a2a8902b5b3884b2cecb4dfb3b --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_accuracy.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="hrnet_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model ${model_path} \ + --precision float16 \ + --engine hrnet.engine + + +# inference +python3 inference.py \ + --engine hrnet.engine \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_performance.sh b/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..ae6a5ef1529ff76de25c62be2b20eb003f79f678 --- /dev/null +++ b/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="hrnet_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model ${model_path} \ + --precision float16 \ + --engine hrnet.engine + + +# inference +python3 inference.py \ + --engine hrnet.engine \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ + --fps_target 70 \ No newline at end of file