diff --git a/models/cv/classification/convnext_base/igie/README.md b/models/cv/classification/convnext_base/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..442ad963509c53b5dc0c4cd8ae9e5aa1081e0370 --- /dev/null +++ b/models/cv/classification/convnext_base/igie/README.md @@ -0,0 +1,47 @@ +# ConvNext Base + +## Description + +The ConvNeXt Base model represents a significant stride in the evolution of convolutional neural networks (CNNs), introduced by researchers at Facebook AI Research (FAIR) and UC Berkeley. It is part of the ConvNeXt family, which challenges the dominance of Vision Transformers (ViTs) in the realm of visual recognition tasks. + +## Setup + +### Install + +```bash +pip3 install onnx +pip3 install tqdm +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +python3 export.py --weight convnext_base-6075fbad.pth --output convnext_base.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_convnext_base_fp16_accuracy.sh +# Performance +bash scripts/infer_convnext_base_fp16_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| -------------- | --------- | --------- | ------- | -------- | -------- | +| ConvNext Base | 32 | FP16 | 589.669 | 83.661 | 96.699 | diff --git a/models/cv/classification/convnext_base/igie/build_engine.py b/models/cv/classification/convnext_base/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/convnext_base/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/convnext_base/igie/export.py b/models/cv/classification/convnext_base/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..d9a2fe01e69085ca3c53782d32080c54cbbd1e5a --- /dev/null +++ b/models/cv/classification/convnext_base/igie/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.convnext_base() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/convnext_base/igie/inference.py b/models/cv/classification/convnext_base/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696 --- /dev/null +++ b/models/cv/classification/convnext_base/igie/inference.py @@ -0,0 +1,186 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256, interpolation=InterpolationMode.BILINEAR), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/foveabox/igie/base/default_runtime.py b/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_accuracy.sh similarity index 47% rename from models/cv/detection/foveabox/igie/base/default_runtime.py rename to models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_accuracy.sh index 609d80377b6cbe907ac30e741f1f28608fb6f2b1..42575772c1d678ecc0b9b0f51a6f827480ab1dd2 100644 --- a/models/cv/detection/foveabox/igie/base/default_runtime.py +++ b/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_accuracy.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # @@ -13,27 +15,21 @@ # License for the specific language governing permissions and limitations # under the License. -default_scope = 'mmdet' - -default_hooks = dict( - timer=dict(type='IterTimerHook'), - logger=dict(type='LoggerHook', interval=50), - param_scheduler=dict(type='ParamSchedulerHook'), - checkpoint=dict(type='CheckpointHook', interval=1), - sampler_seed=dict(type='DistSamplerSeedHook'), - visualization=dict(type='DetVisualizationHook')) +batchsize=32 +model_path="convnext_base.onnx" +datasets_path=${DATASETS_DIR} -env_cfg = dict( - cudnn_benchmark=False, - mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), - dist_cfg=dict(backend='nccl'), -) +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path convnext_base_bs_${batchsize}_fp16.so -vis_backends = [dict(type='LocalVisBackend')] -visualizer = dict( - type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') -log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) -log_level = 'INFO' -load_from = None -resume = False +# inference +python3 inference.py \ + --engine convnext_base_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/detection/hrnet/igie/base/default_runtime.py b/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_performance.sh similarity index 47% rename from models/cv/detection/hrnet/igie/base/default_runtime.py rename to models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_performance.sh index 609d80377b6cbe907ac30e741f1f28608fb6f2b1..d5ae06496b606011a35726082ac331df296195ae 100644 --- a/models/cv/detection/hrnet/igie/base/default_runtime.py +++ b/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_performance.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # @@ -13,27 +15,22 @@ # License for the specific language governing permissions and limitations # under the License. -default_scope = 'mmdet' - -default_hooks = dict( - timer=dict(type='IterTimerHook'), - logger=dict(type='LoggerHook', interval=50), - param_scheduler=dict(type='ParamSchedulerHook'), - checkpoint=dict(type='CheckpointHook', interval=1), - sampler_seed=dict(type='DistSamplerSeedHook'), - visualization=dict(type='DetVisualizationHook')) +batchsize=32 +model_path="convnext_base.onnx" +datasets_path=${DATASETS_DIR} -env_cfg = dict( - cudnn_benchmark=False, - mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), - dist_cfg=dict(backend='nccl'), -) +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path convnext_base_bs_${batchsize}_fp16.so -vis_backends = [dict(type='LocalVisBackend')] -visualizer = dict( - type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') -log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) -log_level = 'INFO' -load_from = None -resume = False +# inference +python3 inference.py \ + --engine convnext_base_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/densenet201/igie/README.md b/models/cv/classification/densenet201/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..595e338b11adb24d5f6d2e73add13e9dc7fb1fd9 --- /dev/null +++ b/models/cv/classification/densenet201/igie/README.md @@ -0,0 +1,48 @@ +# DenseNet201 + +## Description + +DenseNet201 is a deep convolutional neural network that stands out for its unique dense connection architecture, where each layer integrates features from all previous layers, effectively reusing features and reducing the number of parameters. This design not only enhances the network's information flow and parameter efficiency but also increases the model's regularization effect, helping to prevent overfitting. DenseNet201 consists of multiple dense blocks and transition layers, capable of capturing rich feature representations while maintaining computational efficiency, making it suitable for complex image recognition tasks. + + +## Setup + +### Install + +```bash +pip3 install onnx +pip3 install tqdm +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +python3 export.py --weight densenet201-c1103571.pth --output densenet201.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_densenet201_fp16_accuracy.sh +# Performance +bash scripts/infer_densenet201_fp16_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ----------- | --------- | --------- | -------- | -------- | -------- | +| DenseNet201 | 32 | FP16 | 758.592 | 76.851 | 93.338 | diff --git a/models/cv/classification/densenet201/igie/build_engine.py b/models/cv/classification/densenet201/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/densenet201/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/densenet201/igie/export.py b/models/cv/classification/densenet201/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..66019547bf1101889f3fd699581741a114890959 --- /dev/null +++ b/models/cv/classification/densenet201/igie/export.py @@ -0,0 +1,74 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse +import re + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.densenet201(weights=False) + + state_dict = torch.load(args.weight) + + pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' + ) + for key in list(state_dict.keys()): + res = pattern.match(key) + if res: + new_key = res.group(1) + res.group(2) + state_dict[new_key] = state_dict[key] + del state_dict[key] + + model.load_state_dict(state_dict) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/densenet201/igie/inference.py b/models/cv/classification/densenet201/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696 --- /dev/null +++ b/models/cv/classification/densenet201/igie/inference.py @@ -0,0 +1,186 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256, interpolation=InterpolationMode.BILINEAR), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/centernet/igie/base/default_runtime.py b/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_accuracy.sh similarity index 47% rename from models/cv/detection/centernet/igie/base/default_runtime.py rename to models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_accuracy.sh index 609d80377b6cbe907ac30e741f1f28608fb6f2b1..470b285a4038a285110f5b5d6ecce509062d517b 100644 --- a/models/cv/detection/centernet/igie/base/default_runtime.py +++ b/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_accuracy.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # @@ -13,27 +15,21 @@ # License for the specific language governing permissions and limitations # under the License. -default_scope = 'mmdet' - -default_hooks = dict( - timer=dict(type='IterTimerHook'), - logger=dict(type='LoggerHook', interval=50), - param_scheduler=dict(type='ParamSchedulerHook'), - checkpoint=dict(type='CheckpointHook', interval=1), - sampler_seed=dict(type='DistSamplerSeedHook'), - visualization=dict(type='DetVisualizationHook')) +batchsize=32 +model_path="densenet201.onnx" +datasets_path=${DATASETS_DIR} -env_cfg = dict( - cudnn_benchmark=False, - mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), - dist_cfg=dict(backend='nccl'), -) +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path densenet201_bs_${batchsize}_fp16.so -vis_backends = [dict(type='LocalVisBackend')] -visualizer = dict( - type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') -log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) -log_level = 'INFO' -load_from = None -resume = False +# inference +python3 inference.py \ + --engine densenet201_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_performance.sh b/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..e1ad69b7a3b1bbcb4da314ab792448aea440ea87 --- /dev/null +++ b/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="densenet201.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path densenet201_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine densenet201_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b3/igie/README.md b/models/cv/classification/efficientnet_b3/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1331670b5b784045575f146a22a286c462786f84 --- /dev/null +++ b/models/cv/classification/efficientnet_b3/igie/README.md @@ -0,0 +1,47 @@ +# EfficientNet B3 + +## Description + +EfficientNet B3 is a member of the EfficientNet family, a series of convolutional neural network architectures that are designed to achieve excellent accuracy and efficiency. Introduced by researchers at Google, EfficientNets utilize the compound scaling method, which uniformly scales the depth, width, and resolution of the network to improve accuracy and efficiency. + +## Setup + +### Install + +```bash +pip3 install onnx +pip3 install tqdm +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +python3 export.py --weight efficientnet_b3_rwightman-b3899882.pth --output efficientnet_b3.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_efficientnet_b3_fp16_accuracy.sh +# Performance +bash scripts/infer_efficientnet_b3_fp16_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| --------------- | --------- | --------- | -------- | -------- | -------- | +| Efficientnet_b3 | 32 | FP16 | 1144.391 | 78.503 | 94.340 | diff --git a/models/cv/classification/efficientnet_b3/igie/build_engine.py b/models/cv/classification/efficientnet_b3/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/efficientnet_b3/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b3/igie/export.py b/models/cv/classification/efficientnet_b3/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..f66e2cb063c247300fde0cc0c8e772dd7dbceb79 --- /dev/null +++ b/models/cv/classification/efficientnet_b3/igie/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.efficientnet_b3() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/efficientnet_b3/igie/inference.py b/models/cv/classification/efficientnet_b3/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696 --- /dev/null +++ b/models/cv/classification/efficientnet_b3/igie/inference.py @@ -0,0 +1,186 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256, interpolation=InterpolationMode.BILINEAR), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_accuracy.sh b/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..36e9a874f7b3795ebe6aa80378978d384db3f6de --- /dev/null +++ b/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="efficientnet_b3.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path efficientnet_b3_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine efficientnet_b3_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_performance.sh b/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..27b13c90249c3b423d8a3dbb5f2f9d123fc5e9f5 --- /dev/null +++ b/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="efficientnet_b3.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path efficientnet_b3_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine efficientnet_b3_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2_s/igie/README.md b/models/cv/classification/efficientnet_v2_s/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cd4fa6f86e6b91311be8dcaf57aa0ef02771b6b2 --- /dev/null +++ b/models/cv/classification/efficientnet_v2_s/igie/README.md @@ -0,0 +1,47 @@ +# EfficientNet_v2_s + +## Description + +EfficientNetV2 S is an optimized model in the EfficientNetV2 series, which was developed by Google researchers. It continues the legacy of the EfficientNet family, focusing on advancing the state-of-the-art in accuracy and efficiency through advanced scaling techniques and architectural innovations. + +## Setup + +### Install + +```bash +pip3 install onnx +pip3 install tqdm +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +python3 export.py --weight efficientnet_v2_s-dd5fe13b.pth --output efficientnet_v2_s.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_efficientnet_v2_s_fp16_accuracy.sh +# Performance +bash scripts/infer_efficientnet_v2_s_fp16_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ----------------- | --------- | --------- | -------- | -------- | -------- | +| Efficientnet_v2_s | 32 | FP16 | 2357.457 | 81.290 | 95.242 | diff --git a/models/cv/classification/efficientnet_v2_s/igie/build_engine.py b/models/cv/classification/efficientnet_v2_s/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/efficientnet_v2_s/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2_s/igie/export.py b/models/cv/classification/efficientnet_v2_s/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..63b1b4c8af7c406299a29d241c78506964011839 --- /dev/null +++ b/models/cv/classification/efficientnet_v2_s/igie/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.efficientnet_v2_s() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/efficientnet_v2_s/igie/inference.py b/models/cv/classification/efficientnet_v2_s/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696 --- /dev/null +++ b/models/cv/classification/efficientnet_v2_s/igie/inference.py @@ -0,0 +1,186 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256, interpolation=InterpolationMode.BILINEAR), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh b/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..2e7d113328c3c605a3609e2b9b562663000f3334 --- /dev/null +++ b/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="efficientnet_v2_s.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path efficientnet_v2_s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine efficientnet_v2_s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_performance.sh b/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..4c67dd99f550c7ff7e50181de53231e4b1ca9cb7 --- /dev/null +++ b/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="efficientnet_v2_s.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path efficientnet_v2_s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine efficientnet_v2_s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/mnasnet0_5/igie/README.md b/models/cv/classification/mnasnet0_5/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3055a18717c820f843000fe0947226791dbd89a5 --- /dev/null +++ b/models/cv/classification/mnasnet0_5/igie/README.md @@ -0,0 +1,48 @@ +# MNASNet0_5 + +## Description + +MNASNet0_5 is a neural network architecture optimized for mobile devices, designed through neural architecture search technology. It is characterized by high efficiency and excellent accuracy, offering 50% higher accuracy than MobileNetV2 while maintaining low latency and memory usage. MNASNet0_5 widely uses depthwise separable convolutions, supports multi-scale inputs, and demonstrates good robustness, making it suitable for real-time image recognition tasks in resource-constrained environments. + + +## Setup + +### Install + +```bash +pip3 install onnx +pip3 install tqdm +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +python3 export.py --weight mnasnet0.5_top1_67.823-3ffadce67e.pth --output mnasnet0_5.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_mnasnet0_5_fp16_accuracy.sh +# Performance +bash scripts/infer_mnasnet0_5_fp16_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ----------------- | --------- | --------- | -------- | -------- | -------- | +| MnasNet0_5 | 32 | FP16 | 7933.980 | 67.748 | 87.452 | diff --git a/models/cv/classification/mnasnet0_5/igie/build_engine.py b/models/cv/classification/mnasnet0_5/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/mnasnet0_5/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/mnasnet0_5/igie/export.py b/models/cv/classification/mnasnet0_5/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..bd48e206871de1b3cc86f267e215d556b3bd3c6f --- /dev/null +++ b/models/cv/classification/mnasnet0_5/igie/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.mnasnet0_5() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/mnasnet0_5/igie/inference.py b/models/cv/classification/mnasnet0_5/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696 --- /dev/null +++ b/models/cv/classification/mnasnet0_5/igie/inference.py @@ -0,0 +1,186 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256, interpolation=InterpolationMode.BILINEAR), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_accuracy.sh b/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..a1c3b37a6198e500753759a7b437fb8c84399239 --- /dev/null +++ b/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="mnasnet0_5.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path mnasnet0_5_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine mnasnet0_5_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_performance.sh b/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..89271d3380b9988acb3374b5ffaaa691b0496b3a --- /dev/null +++ b/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="mnasnet0_5.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path mnasnet0_5_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine mnasnet0_5_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/mvitv2_base/igie/README.md b/models/cv/classification/mvitv2_base/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..199a9597ae942fe578fbb75790d989f4d2980761 --- /dev/null +++ b/models/cv/classification/mvitv2_base/igie/README.md @@ -0,0 +1,68 @@ +# MViTv2-base + +## Description + +MViTv2_base is an efficient multi-scale vision Transformer model designed specifically for image classification tasks. By employing a multi-scale structure and hierarchical representation, it effectively captures both global and local image features while maintaining computational efficiency. The MViTv2_base has demonstrated excellent performance on multiple standard datasets and is suitable for a variety of visual recognition tasks. + +## Setup + +### Install + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-dev + +pip3 install onnx +pip3 install tqdm +pip3 install onnxsim +pip3 install mmcv==1.5.3 +pip3 install mmcls +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +# git clone mmpretrain +git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git + +# export onnx model +python3 export.py --cfg mmpretrain/configs/mvit/mvitv2-base_8xb256_in1k.py --weight mvitv2-base_3rdparty_in1k_20220722-9c4f0a17.pth --output mvitv2_base.onnx + +# Use onnxsim optimize onnx model +onnxsim mvitv2_base.onnx mvitv2_base_opt.onnx + +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_mvitv2_base_fp16_accuracy.sh +# Performance +bash scripts/infer_mvitv2_base_fp16_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ----------- | --------- | --------- | -------- | -------- | -------- | +| MViTv2-base | 16 | FP16 | 58.76 | 84.226 | 96.848 | + +## Reference + +MViTv2-base: diff --git a/models/cv/classification/mvitv2_base/igie/build_engine.py b/models/cv/classification/mvitv2_base/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/mvitv2_base/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/mvitv2_base/igie/export.py b/models/cv/classification/mvitv2_base/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..d78b898b57ab44fe968882f2beb27e5d655c509f --- /dev/null +++ b/models/cv/classification/mvitv2_base/igie/export.py @@ -0,0 +1,74 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse + +import torch +from mmcls.apis import init_model + +class Model(torch.nn.Module): + def __init__(self, config_file, checkpoint_file): + super().__init__() + self.model = init_model(config_file, checkpoint_file, device="cpu") + + def forward(self, x): + head = self.model.simple_test(x) + return head + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--cfg", + type=str, + required=True, + help="model config file.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + config_file = args.cfg + checkpoint_file = args.weight + model = Model(config_file, checkpoint_file).eval() + + input_names = ['input'] + output_names = ['output'] + dummy_input = torch.randn(16, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == '__main__': + main() + diff --git a/models/cv/classification/mvitv2_base/igie/inference.py b/models/cv/classification/mvitv2_base/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..c42cf871274e316f7175ea3ad77c5468d2c9b936 --- /dev/null +++ b/models/cv/classification/mvitv2_base/igie/inference.py @@ -0,0 +1,185 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_accuracy.sh b/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..85f66d502c91b6d1b5d4a371576dd6ece1167067 --- /dev/null +++ b/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=16 +model_path="mvitv2_base_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path mvitv2_base_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine mvitv2_base_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} diff --git a/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_performance.sh b/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..d54dac50ec287023085ef286b9642d1b4512b42e --- /dev/null +++ b/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=16 +model_path="mvitv2_base_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path mvitv2_base_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine mvitv2_base_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/regnet_y_1_6gf/igie/README.md b/models/cv/classification/regnet_y_1_6gf/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3f96b09ba2ea35f66ce8ed868eaefb8ea3430669 --- /dev/null +++ b/models/cv/classification/regnet_y_1_6gf/igie/README.md @@ -0,0 +1,47 @@ +# RegNet_y_1_6gf + +## Description + +RegNet is a family of models designed for image classification tasks, as described in the paper "Designing Network Design Spaces". The RegNet design space provides simple and fast networks that work well across a wide range of computational budgets.The architecture of RegNet models is based on the principle of designing network design spaces, which allows for a more systematic exploration of possible network architectures. This makes it easier to understand and modify the architecture.RegNet_y_1_6gf is a specific model within the RegNet family, designed for image classification tasks. + +## Setup + +### Install + +```bash +pip3 install onnx +pip3 install tqdm +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +python3 export.py --weight regnet_y_1_6gf-b11a554e.pth --output regnet_y_1_6gf.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_regnet_y_1_6gf_fp16_accuracy.sh +# Performance +bash scripts/infer_regnet_y_1_6gf_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) +------------------|-----------|----------|---------|---------|-------- +RegNet_y_1_6gf | 32 | FP16 | 1785.44 | 77.933 | 93.948 diff --git a/models/cv/classification/regnet_y_1_6gf/igie/build_engine.py b/models/cv/classification/regnet_y_1_6gf/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/regnet_y_1_6gf/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/regnet_y_1_6gf/igie/export.py b/models/cv/classification/regnet_y_1_6gf/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..f2a9f0a2757b3a65852b9825b87ef756a08a7ec4 --- /dev/null +++ b/models/cv/classification/regnet_y_1_6gf/igie/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.regnet_y_1_6gf() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/regnet_y_1_6gf/igie/inference.py b/models/cv/classification/regnet_y_1_6gf/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696 --- /dev/null +++ b/models/cv/classification/regnet_y_1_6gf/igie/inference.py @@ -0,0 +1,186 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256, interpolation=InterpolationMode.BILINEAR), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_accuracy.sh b/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..62d9cc115ed77d3bdc8260cdedc50cadd8bc5b9b --- /dev/null +++ b/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="regnet_y_1_6gf.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path regnet_y_1_6gf_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine regnet_y_1_6gf_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_performance.sh b/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..afe3a45083e873691b0a670864e7055a36c72587 --- /dev/null +++ b/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="regnet_y_1_6gf.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path regnet_y_1_6gf_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine regnet_y_1_6gf_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/igie/README.md b/models/cv/classification/resnetv1d50/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0609b34fd422b671b249147ceec9b75613805a99 --- /dev/null +++ b/models/cv/classification/resnetv1d50/igie/README.md @@ -0,0 +1,64 @@ +# ResNetV1D-50 + +## Description + +ResNetV1D-50 is an enhanced version of ResNetV1-50 that incorporates changes like dilated convolutions and adjusted downsampling, leading to better performance in large-scale image classification tasks. Its ability to capture richer image features makes it a popular choice in deep learning models. + +## Setup + +### Install + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-dev + +pip3 install onnx +pip3 install tqdm +pip3 install mmcv==1.5.3 +pip3 install mmcls +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +# git clone mmpretrain +git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git + +# export onnx model +python3 export.py --cfg mmpretrain/configs/resnet/resnetv1d50_b32x8_imagenet.py --weight resnetv1d50_b32x8_imagenet_20210531-db14775a.pth --output resnetv1d50.onnx + +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_resnetv1d50_fp16_accuracy.sh +# Performance +bash scripts/infer_resnetv1d50_fp16_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ------------ | --------- | --------- | -------- | -------- | -------- | +| ResNetV1D-50 | 32 | FP16 | 4017.92 | 77.517 | 93.538 | + +## Reference + +ResNetV1D-50: diff --git a/models/cv/classification/resnetv1d50/igie/build_engine.py b/models/cv/classification/resnetv1d50/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/resnetv1d50/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/igie/export.py b/models/cv/classification/resnetv1d50/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..840450997c44ecfab2191cfbd1c545e00ff9de57 --- /dev/null +++ b/models/cv/classification/resnetv1d50/igie/export.py @@ -0,0 +1,76 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse + +import torch +from mmcls.apis import init_model + +class Model(torch.nn.Module): + def __init__(self, config_file, checkpoint_file): + super().__init__() + self.model = init_model(config_file, checkpoint_file, device="cpu") + + def forward(self, x): + head = self.model.simple_test(x) + return head + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--cfg", + type=str, + required=True, + help="model config file.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + config_file = args.cfg + checkpoint_file = args.weight + model = Model(config_file, checkpoint_file).eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == '__main__': + main() + diff --git a/models/cv/classification/resnetv1d50/igie/inference.py b/models/cv/classification/resnetv1d50/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..c42cf871274e316f7175ea3ad77c5468d2c9b936 --- /dev/null +++ b/models/cv/classification/resnetv1d50/igie/inference.py @@ -0,0 +1,185 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_accuracy.sh b/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..c3a3d869ea6fee7b4c4d29b34aa8b345dc8cea3e --- /dev/null +++ b/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="resnetv1d50.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path resnetv1d50_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine resnetv1d50_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} diff --git a/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_performance.sh b/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..194d290069c5c9da11d5483a81ed1d5b665d9a06 --- /dev/null +++ b/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="resnetv1d50.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path resnetv1d50_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine resnetv1d50_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/igie/README.md b/models/cv/classification/resnext101_64x4d/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b9846f2e0516a81d02e71f1bc78765e76ceedb9 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/igie/README.md @@ -0,0 +1,47 @@ +# ResNext101_64x4d + +## Description + +The ResNeXt101_64x4d is a deep learning model based on the deep residual network architecture, which enhances performance and efficiency through the use of grouped convolutions. With a depth of 101 layers and 64 filter groups, it is particularly suited for complex image recognition tasks. While maintaining excellent accuracy, it can adapt to various input sizes + +## Setup + +### Install + +```bash +pip3 install onnx +pip3 install tqdm +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +python3 export.py --weight resnext101_64x4d-173b62eb.pth --output resnext101_64x4d.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_resnext101_64x4d_fp16_accuracy.sh +# Performance +bash scripts/infer_resnext101_64x4d_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) +----------------|-----------|----------|---------|----------|-------- +ResNext101_64x4d| 32 | FP16 | 663.13 | 82.953 | 96.221 diff --git a/models/cv/classification/resnext101_64x4d/igie/build_engine.py b/models/cv/classification/resnext101_64x4d/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/igie/export.py b/models/cv/classification/resnext101_64x4d/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..43a20fca1eb39c834868a4cb3cb39d0de4c1a465 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/igie/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.resnext101_64x4d() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/resnext101_64x4d/igie/inference.py b/models/cv/classification/resnext101_64x4d/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/igie/inference.py @@ -0,0 +1,186 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256, interpolation=InterpolationMode.BILINEAR), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_accuracy.sh b/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..edcb1a0040c0c401fdc0c5f2601232112e25d307 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="resnext101_64x4d.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path resnext101_64x4d_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine resnext101_64x4d_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_performance.sh b/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..3ccf3bc5c58bf3e672ab4d22a3c4724b2f3df880 --- /dev/null +++ b/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="resnext101_64x4d.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path resnext101_64x4d_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine resnext101_64x4d_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/README.md b/models/cv/classification/shufflenetv2_x1_5/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5bd87eab586ab53026b7e6116efad74c90fe6b0b --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/igie/README.md @@ -0,0 +1,47 @@ +# ShuffleNetV2_x1_5 + +## Description + +ShuffleNetV2_x1_5 is a lightweight convolutional neural network specifically designed for efficient image recognition tasks on resource-constrained devices. It achieves high performance and low latency through the introduction of channel shuffling and pointwise group convolutions. Despite its small model size, it offers high accuracy and is suitable for a variety of vision tasks in mobile devices and embedded systems. + +## Setup + +### Install + +```bash +pip3 install onnx +pip3 install tqdm +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +python3 export.py --weight shufflenetv2_x1_5-3c479a10.pth --output shufflenetv2_x1_5.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh +# Performance +bash scripts/infer_shufflenetv2_x1_5_fp16_performance.sh +``` + +## Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ----------------- | --------- | --------- | -------- | -------- | -------- | +| ShuffleNetV2_x1_5 | 32 | FP16 | 7478.728 | 72.755 | 91.031 | diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/build_engine.py b/models/cv/classification/shufflenetv2_x1_5/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/export.py b/models/cv/classification/shufflenetv2_x1_5/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..0f89ba7cb2d837243d465e78df5102abbbbbdf99 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/igie/export.py @@ -0,0 +1,61 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch +import torchvision +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + model = torchvision.models.shufflenet_v2_x1_5() + model.load_state_dict(torch.load(args.weight)) + model.eval() + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 224, 224) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == "__main__": + main() diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/inference.py b/models/cv/classification/shufflenetv2_x1_5/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/igie/inference.py @@ -0,0 +1,186 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + dataset = torchvision.datasets.ImageFolder( + data_path, + transforms.Compose( + [ + transforms.Resize(256, interpolation=InterpolationMode.BILINEAR), + transforms.CenterCrop(224), + transforms.PILToTensor(), + transforms.ConvertImageDtype(torch.float), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225) + ) + ] + ) + ) + + dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers) + + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh b/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..9be9264cb25b816606f414611f611eaf2ffa5b27 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="shufflenetv2_x1_5.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path shufflenetv2_x1_5_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine shufflenetv2_x1_5_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh b/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..cc5a424d73a3467a0d5be0e79e2f309fc7083a84 --- /dev/null +++ b/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="shufflenetv2_x1_5.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path shufflenetv2_x1_5_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine shufflenetv2_x1_5_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/detection/atss/igie/README.md b/models/cv/detection/atss/igie/README.md index c64581d413960de39d85032115e563b180e83662..c28719ce7d3b83274381cee02753cb5ff30a133a 100644 --- a/models/cv/detection/atss/igie/README.md +++ b/models/cv/detection/atss/igie/README.md @@ -55,6 +55,11 @@ bash scripts/infer_atss_fp16_performance.sh ## Results -| Model | BatchSize | Input Shape | Precision | FPS | mAP@0.5(%) | -| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: | -| ATSS | 32 | 800x800 | FP16 | 81.671 | 0.541 | \ No newline at end of file +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +-------|-----------|----------|----------|----------|---------------| +ATSS | 32 | FP16 | 81.671 | 0.541 | 0.367 | + + +## Reference + +mmdetection: \ No newline at end of file diff --git a/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py b/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py index 0378cf0b6f9307ccd1e931eab2c705ab3b121475..3fc51df938624d7d58ff35093a2527ff55a52243 100755 --- a/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py +++ b/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py @@ -130,7 +130,7 @@ model = dict( pos_weight=-1), type='ATSS') optim_wrapper = dict( - optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001), + optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001), type='OptimWrapper') param_scheduler = [ dict( @@ -154,7 +154,7 @@ test_dataloader = dict( ann_file='annotations/instances_val2017.json', backend_args=None, data_prefix=dict(img='images/val2017/'), - data_root='/root/.igie_cache/modelzoo_data/datasets/coco/', + data_root='data/coco/', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=False, scale=( @@ -179,8 +179,7 @@ test_dataloader = dict( persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( - ann_file= - '/root/.igie_cache/modelzoo_data/datasets/coco/annotations/instances_val2017.json', + ann_file='data/coco/annotations/instances_val2017.json', backend_args=None, format_only=False, metric='bbox', @@ -281,5 +280,4 @@ visualizer = dict( type='DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), - ]) -work_dir = './' + ]) \ No newline at end of file diff --git a/models/cv/detection/atss/igie/inference.py b/models/cv/detection/atss/igie/inference.py index cc1a2b8604533313fb7cb8f41d5d899e3ce25553..8bf78903e5cb9babe60dc0a8f512d894497431e2 100644 --- a/models/cv/detection/atss/igie/inference.py +++ b/models/cv/detection/atss/igie/inference.py @@ -20,7 +20,7 @@ import torch import numpy as np from tvm import relay from tqdm import tqdm -from mmpose.registry import RUNNERS +from mmdet.registry import RUNNERS from mmengine.config import Config def parse_args(): @@ -99,7 +99,7 @@ def main(): # runner config cfg = Config.fromfile("atss_r50_fpn_1x_coco.py") - cfg.work_dir = "./" + cfg.work_dir = "./workspace" cfg['test_dataloader']['batch_size'] = batch_size cfg['test_dataloader']['dataset']['data_root'] = args.datasets cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' diff --git a/models/cv/detection/centernet/igie/base/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py b/models/cv/detection/centernet/igie/base/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py deleted file mode 100644 index 894e4b4fcb8d7b24a4ae853af48b64534f203068..0000000000000000000000000000000000000000 --- a/models/cv/detection/centernet/igie/base/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - - -_base_ = [ - 'coco_detection.py', - 'schedule_1x.py', 'default_runtime.py', -] - -dataset_type = 'CocoDataset' -data_root = 'data/coco/' - -# model settings -model = dict( - type='CenterNet', - data_preprocessor=dict( - type='DetDataPreprocessor', - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - bgr_to_rgb=True), - backbone=dict( - type='ResNet', - depth=18, - norm_eval=False, - norm_cfg=dict(type='BN'), - init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')), - neck=dict( - type='CTResNetNeck', - in_channels=512, - num_deconv_filters=(256, 128, 64), - num_deconv_kernels=(4, 4, 4), - use_dcn=True), - bbox_head=dict( - type='CenterNetHead', - num_classes=80, - in_channels=64, - feat_channels=64, - loss_center_heatmap=dict(type='GaussianFocalLoss', loss_weight=1.0), - loss_wh=dict(type='L1Loss', loss_weight=0.1), - loss_offset=dict(type='L1Loss', loss_weight=1.0)), - train_cfg=None, - test_cfg=dict(topk=100, local_maximum_kernel=3, max_per_img=100)) - -train_pipeline = [ - dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PhotoMetricDistortion', - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - dict( - type='RandomCenterCropPad', - # The cropped images are padded into squares during training, - # but may be less than crop_size. - crop_size=(512, 512), - ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), - mean=[0, 0, 0], - std=[1, 1, 1], - to_rgb=True, - test_pad_mode=None), - # Make sure the output is always crop_size. - dict(type='Resize', scale=(512, 512), keep_ratio=True), - dict(type='RandomFlip', prob=0.5), - dict(type='PackDetInputs') -] -test_pipeline = [ - dict( - type='LoadImageFromFile', - backend_args={{_base_.backend_args}}, - to_float32=True), - # don't need Resize - dict( - type='RandomCenterCropPad', - ratios=None, - border=None, - mean=[0, 0, 0], - std=[1, 1, 1], - to_rgb=True, - test_mode=True, - test_pad_mode=['logical_or', 31], - test_pad_add_pix=1), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'border')) -] - -# Use RepeatDataset to speed up training -train_dataloader = dict( - batch_size=16, - num_workers=4, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict( - _delete_=True, - type='RepeatDataset', - times=5, - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), - filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline, - backend_args={{_base_.backend_args}}, - ))) - -val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) -test_dataloader = val_dataloader - -# optimizer -# Based on the default settings of modern detectors, the SGD effect is better -# than the Adam in the source code, so we use SGD default settings and -# if you use adam+lr5e-4, the map is 29.1. -optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) - -max_epochs = 28 -# learning policy -# Based on the default settings of modern detectors, we added warmup settings. -param_scheduler = [ - dict( - type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, - end=1000), - dict( - type='MultiStepLR', - begin=0, - end=max_epochs, - by_epoch=True, - milestones=[18, 24], # the real step is [18*5, 24*5] - gamma=0.1) -] -train_cfg = dict(max_epochs=max_epochs) # the real epoch is 28*5=140 - -# NOTE: `auto_scale_lr` is for automatically scaling LR, -# USER SHOULD NOT CHANGE ITS VALUES. -# base_batch_size = (8 GPUs) x (16 samples per GPU) -auto_scale_lr = dict(base_batch_size=128) diff --git a/models/cv/detection/centernet/igie/base/coco_detection.py b/models/cv/detection/centernet/igie/base/coco_detection.py deleted file mode 100644 index f58fe67becb40462fc1f6b9ab9a49386e90f11e2..0000000000000000000000000000000000000000 --- a/models/cv/detection/centernet/igie/base/coco_detection.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' - -backend_args = None - -train_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', prob=0.5), - dict(type='PackDetInputs') -] -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='Resize', scale=(1333, 800), keep_ratio=True), - # If you don't have a gt annotation, delete the pipeline - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] -train_dataloader = dict( - batch_size=2, - num_workers=2, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=True), - batch_sampler=dict(type='AspectRatioBatchSampler'), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), - filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline, - backend_args=backend_args)) -val_dataloader = dict( - batch_size=1, - num_workers=2, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_val2017.json', - data_prefix=dict(img='val2017/'), - test_mode=True, - pipeline=test_pipeline, - backend_args=backend_args)) -test_dataloader = val_dataloader - -val_evaluator = dict( - type='CocoMetric', - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox', - format_only=False, - backend_args=backend_args) -test_evaluator = val_evaluator \ No newline at end of file diff --git a/models/cv/detection/centernet/igie/centernet_r18_8xb16-crop512-140e_coco.py b/models/cv/detection/centernet/igie/centernet_r18_8xb16-crop512-140e_coco.py index 81a2142134d66d1a128375033126c044c3e4c43f..d2156d8fb12b78fdd2553c5275bbf7d54d25d8be 100644 --- a/models/cv/detection/centernet/igie/centernet_r18_8xb16-crop512-140e_coco.py +++ b/models/cv/detection/centernet/igie/centernet_r18_8xb16-crop512-140e_coco.py @@ -105,7 +105,7 @@ test_dataloader = dict( ann_file='annotations/instances_val2017.json', backend_args=None, data_prefix=dict(img='images/val2017/'), - data_root='/home/peng.yang/Datasets/coco', + data_root='data/coco/', pipeline=[ dict(backend_args=None, to_float32=True, type='LoadImageFromFile'), dict( @@ -147,7 +147,7 @@ test_dataloader = dict( persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( - ann_file='/home/peng.yang/Datasets/coco/annotations/instances_val2017.json', + ann_file='data/coco/annotations/instances_val2017.json', backend_args=None, format_only=False, metric='bbox', @@ -366,4 +366,3 @@ visualizer = dict( vis_backends=[ dict(type='LocalVisBackend'), ]) -work_dir = './' diff --git a/models/cv/detection/centernet/igie/inference.py b/models/cv/detection/centernet/igie/inference.py index d3417486087f691bed6cc60cf0c0b1d3062cedfc..85138c1642cf0370eac738349d3f31bab3805b19 100644 --- a/models/cv/detection/centernet/igie/inference.py +++ b/models/cv/detection/centernet/igie/inference.py @@ -97,15 +97,17 @@ def main(): for _ in range(args.warmup): module.run() - # Runner config + # runner config cfg = Config.fromfile("centernet_r18_8xb16-crop512-140e_coco.py") - cfg.work_dir = "./" + cfg.work_dir = "./workspace" cfg['test_dataloader']['batch_size'] = batch_size cfg['test_dataloader']['dataset']['data_root'] = args.datasets cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json') + cfg['log_level'] = 'ERROR' + # build runner runner = RUNNERS.build(cfg) for input_data in tqdm(runner.test_dataloader): diff --git a/models/cv/detection/fcos/igie/README.md b/models/cv/detection/fcos/igie/README.md index f10cc742410c3aaf0bd32ea27b7b177ed90f1936..693e6e267a44b24d11a62526591648c2a23e6274 100644 --- a/models/cv/detection/fcos/igie/README.md +++ b/models/cv/detection/fcos/igie/README.md @@ -55,6 +55,10 @@ bash scripts/infer_fcos_fp16_performance.sh ## Results -| Model | BatchSize | Input Shape | Precision | FPS | mAP@0.5(%) | -| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: | -| FCOS | 32 | 800x800 | FP16 | 83.09 | 0.522 | \ No newline at end of file +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +-------|-----------|----------|----------|----------|---------------| +FCOS | 32 | FP16 | 83.09 | 0.522 | 0.339 | + +## Reference + +mmdetection: \ No newline at end of file diff --git a/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py b/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py index 04941d978d5a49cf71df16acf4e0b7486c0ea56d..d90cc0e41be314045ac1cff6ba3d1225225190b5 100755 --- a/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py +++ b/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py @@ -29,7 +29,6 @@ env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) -evaluation = dict(interval=1, metric='bbox') load_from = None log_level = 'ERROR' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) @@ -108,11 +107,17 @@ model = dict( score_thr=0.05), type='FCOS') optim_wrapper = dict( - optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001), + clip_grad=dict(max_norm=35, norm_type=2), + optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001), + paramwise_cfg=dict(bias_decay_mult=0.0, bias_lr_mult=2.0), type='OptimWrapper') param_scheduler = [ dict( - begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'), + begin=0, + by_epoch=False, + end=500, + factor=0.3333333333333333, + type='ConstantLR'), dict( begin=0, by_epoch=True, @@ -132,7 +137,7 @@ test_dataloader = dict( ann_file='annotations/instances_val2017.json', backend_args=None, data_prefix=dict(img='images/val2017/'), - data_root='/root/.igie_cache/modelzoo_data/datasets/coco/', + data_root='data/coco', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( @@ -157,15 +162,14 @@ test_dataloader = dict( persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( - ann_file= - '/root/.igie_cache/modelzoo_data/datasets/coco/annotations/instances_val2017.json', + ann_file='data/coco/annotations/instances_val2017.json', backend_args=None, format_only=False, metric='bbox', type='CocoMetric') test_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), - dict(keep_ratio=True, scale=( + dict(keep_ratio=False, scale=( 800, 800, ), type='Resize'), @@ -224,7 +228,7 @@ val_dataloader = dict( data_root='data/coco/', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), - dict(keep_ratio=True, scale=( + dict(keep_ratio=False, scale=( 800, 800, ), type='Resize'), @@ -260,4 +264,3 @@ visualizer = dict( vis_backends=[ dict(type='LocalVisBackend'), ]) -work_dir = './' diff --git a/models/cv/detection/fcos/igie/inference.py b/models/cv/detection/fcos/igie/inference.py index e0517124fcb3e4cf136adfff62f6dda9c2765064..95b18899707312cc1dec508b4c4fccf76b8cb65b 100644 --- a/models/cv/detection/fcos/igie/inference.py +++ b/models/cv/detection/fcos/igie/inference.py @@ -20,7 +20,7 @@ import torch import numpy as np from tvm import relay from tqdm import tqdm -from mmpose.registry import RUNNERS +from mmdet.registry import RUNNERS from mmengine.config import Config def parse_args(): @@ -99,7 +99,7 @@ def main(): # runner config cfg = Config.fromfile("fcos_r50_caffe_fpn_gn-head_1x_coco.py") - cfg.work_dir = "./" + cfg.work_dir = "./workspace" cfg['test_dataloader']['batch_size'] = batch_size cfg['test_dataloader']['dataset']['data_root'] = args.datasets cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' diff --git a/models/cv/detection/foveabox/igie/base/coco_detection.py b/models/cv/detection/foveabox/igie/base/coco_detection.py deleted file mode 100644 index f58fe67becb40462fc1f6b9ab9a49386e90f11e2..0000000000000000000000000000000000000000 --- a/models/cv/detection/foveabox/igie/base/coco_detection.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' - -backend_args = None - -train_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', prob=0.5), - dict(type='PackDetInputs') -] -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='Resize', scale=(1333, 800), keep_ratio=True), - # If you don't have a gt annotation, delete the pipeline - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] -train_dataloader = dict( - batch_size=2, - num_workers=2, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=True), - batch_sampler=dict(type='AspectRatioBatchSampler'), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), - filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline, - backend_args=backend_args)) -val_dataloader = dict( - batch_size=1, - num_workers=2, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_val2017.json', - data_prefix=dict(img='val2017/'), - test_mode=True, - pipeline=test_pipeline, - backend_args=backend_args)) -test_dataloader = val_dataloader - -val_evaluator = dict( - type='CocoMetric', - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox', - format_only=False, - backend_args=backend_args) -test_evaluator = val_evaluator \ No newline at end of file diff --git a/models/cv/detection/foveabox/igie/base/schedule_1x.py b/models/cv/detection/foveabox/igie/base/schedule_1x.py deleted file mode 100644 index 9b16d80c7704f8b55b6d3f3e622399515668f26b..0000000000000000000000000000000000000000 --- a/models/cv/detection/foveabox/igie/base/schedule_1x.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -# training schedule for 1x -train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') - -# learning rate -param_scheduler = [ - dict( - type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), - dict( - type='MultiStepLR', - begin=0, - end=12, - by_epoch=True, - milestones=[8, 11], - gamma=0.1) -] - -# optimizer -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) - -# Default setting for scaling LR automatically -# - `enable` means enable scaling LR automatically -# or not by default. -# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). -auto_scale_lr = dict(enable=False, base_batch_size=16) diff --git a/models/cv/detection/foveabox/igie/fovea_r50_fpn_4xb4-1x_coco.py b/models/cv/detection/foveabox/igie/fovea_r50_fpn_4xb4-1x_coco.py index 79329f8d952dc911cb7d9f93f0f8456c1b78b57e..5be9e50af49009813315b0f7b9fc3221134d8ee1 100644 --- a/models/cv/detection/foveabox/igie/fovea_r50_fpn_4xb4-1x_coco.py +++ b/models/cv/detection/foveabox/igie/fovea_r50_fpn_4xb4-1x_coco.py @@ -157,7 +157,7 @@ test_dataloader = dict( ann_file='annotations/instances_val2017.json', backend_args=None, data_prefix=dict(img='images/val2017/'), - data_root='/home/peng.yang/Datasets/coco', + data_root='data/coco/', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( @@ -182,7 +182,7 @@ test_dataloader = dict( persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( - ann_file='/home/peng.yang/Datasets/coco/annotations/instances_val2017.json', + ann_file='data/coco/annotations/instances_val2017.json', backend_args=None, format_only=False, metric='bbox', diff --git a/models/cv/detection/foveabox/igie/inference.py b/models/cv/detection/foveabox/igie/inference.py index 6b31d8987c73f0ad28863a88c669a6d7b49eb9ed..ba4c846f361a1ced86457aa8ea95fc45574bcdeb 100644 --- a/models/cv/detection/foveabox/igie/inference.py +++ b/models/cv/detection/foveabox/igie/inference.py @@ -97,15 +97,17 @@ def main(): for _ in range(args.warmup): module.run() - # Runner config + # runner config cfg = Config.fromfile("fovea_r50_fpn_4xb4-1x_coco.py") - cfg.work_dir = "./" + cfg.work_dir = "./workspace" cfg['test_dataloader']['batch_size'] = batch_size cfg['test_dataloader']['dataset']['data_root'] = args.datasets cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json') - + cfg['log_level'] = 'ERROR' + + # build runner runner = RUNNERS.build(cfg) for input_data in tqdm(runner.test_dataloader): diff --git a/models/cv/detection/fsaf/igie/README.md b/models/cv/detection/fsaf/igie/README.md index 14d56932be1695135a8f864f00eecba5c0932a83..5f8a4d4f4565b038f5e0dde70436c70f13abe920 100644 --- a/models/cv/detection/fsaf/igie/README.md +++ b/models/cv/detection/fsaf/igie/README.md @@ -55,6 +55,10 @@ bash scripts/infer_fsaf_fp16_performance.sh ## Results -| Model | BatchSize | Input Shape | Precision | FPS | mAP@0.5(%) | -| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: | -| FSAF | 32 | 800x800 | FP16 | 122.35 | 0.530 | \ No newline at end of file +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +-------|-----------|----------|----------|----------|---------------| +FSAF | 32 | FP16 | 122.35 | 0.530 | 0.345 | + +## Reference + +mmdetection: diff --git a/models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py b/models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py deleted file mode 100644 index 425d3d32efbaede948e1ac55e27d65be4ab26283..0000000000000000000000000000000000000000 --- a/models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -# model settings -model = dict( - type='RetinaNet', - data_preprocessor=dict( - type='DetDataPreprocessor', - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - bgr_to_rgb=True, - pad_size_divisor=32), - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - norm_eval=True, - style='pytorch', - init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs='on_input', - num_outs=5), - bbox_head=dict( - type='RetinaHead', - num_classes=80, - in_channels=256, - stacked_convs=4, - feat_channels=256, - anchor_generator=dict( - type='AnchorGenerator', - octave_base_scale=4, - scales_per_octave=3, - ratios=[0.5, 1.0, 2.0], - strides=[8, 16, 32, 64, 128]), - bbox_coder=dict( - type='DeltaXYWHBBoxCoder', - target_means=[.0, .0, .0, .0], - target_stds=[1.0, 1.0, 1.0, 1.0]), - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='L1Loss', loss_weight=1.0)), - # model training and testing settings - train_cfg=dict( - assigner=dict( - type='MaxIoUAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.4, - min_pos_iou=0, - ignore_iof_thr=-1), - sampler=dict( - type='PseudoSampler'), # Focal loss should use PseudoSampler - allowed_border=-1, - pos_weight=-1, - debug=False), - test_cfg=dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_threshold=0.5), - max_per_img=100)) \ No newline at end of file diff --git a/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py b/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py index 33c2df60e5472e22e57616614cad5ff4fb21e984..d511321f40a1a3ca786e807d6f3a88f60c8bb994 100755 --- a/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py +++ b/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py @@ -147,7 +147,7 @@ test_dataloader = dict( ann_file='annotations/instances_val2017.json', backend_args=None, data_prefix=dict(img='images/val2017/'), - data_root='/root/.igie_cache/modelzoo_data/datasets/coco/', + data_root='data/coco/', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=False, scale=( @@ -172,8 +172,7 @@ test_dataloader = dict( persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( - ann_file= - '/root/.igie_cache/modelzoo_data/datasets/coco/annotations/instances_val2017.json', + ann_file='data/coco/annotations/instances_val2017.json', backend_args=None, format_only=False, metric='bbox', @@ -274,5 +273,4 @@ visualizer = dict( type='DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), - ]) -work_dir = './' + ]) \ No newline at end of file diff --git a/models/cv/detection/fsaf/igie/inference.py b/models/cv/detection/fsaf/igie/inference.py index 058a53433e2ac0e02f3650846980c2d5460d5e90..7d128c3d891db2d3ca48634a3a64bb8ea251c527 100644 --- a/models/cv/detection/fsaf/igie/inference.py +++ b/models/cv/detection/fsaf/igie/inference.py @@ -20,7 +20,7 @@ import torch import numpy as np from tvm import relay from tqdm import tqdm -from mmpose.registry import RUNNERS +from mmdet.registry import RUNNERS from mmengine.config import Config def parse_args(): @@ -99,7 +99,7 @@ def main(): # runner config cfg = Config.fromfile("fsaf_r50_fpn_1x_coco.py") - cfg.work_dir = "./" + cfg.work_dir = "./workspace" cfg['test_dataloader']['batch_size'] = batch_size cfg['test_dataloader']['dataset']['data_root'] = args.datasets cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' diff --git a/models/cv/detection/hrnet/igie/base/coco_detection.py b/models/cv/detection/hrnet/igie/base/coco_detection.py deleted file mode 100644 index f58fe67becb40462fc1f6b9ab9a49386e90f11e2..0000000000000000000000000000000000000000 --- a/models/cv/detection/hrnet/igie/base/coco_detection.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' - -backend_args = None - -train_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', prob=0.5), - dict(type='PackDetInputs') -] -test_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), - dict(type='Resize', scale=(1333, 800), keep_ratio=True), - # If you don't have a gt annotation, delete the pipeline - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] -train_dataloader = dict( - batch_size=2, - num_workers=2, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=True), - batch_sampler=dict(type='AspectRatioBatchSampler'), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), - filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline, - backend_args=backend_args)) -val_dataloader = dict( - batch_size=1, - num_workers=2, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_val2017.json', - data_prefix=dict(img='val2017/'), - test_mode=True, - pipeline=test_pipeline, - backend_args=backend_args)) -test_dataloader = val_dataloader - -val_evaluator = dict( - type='CocoMetric', - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox', - format_only=False, - backend_args=backend_args) -test_evaluator = val_evaluator \ No newline at end of file diff --git a/models/cv/detection/hrnet/igie/base/fcos_hrnetv2p-w32-gn-head_4xb4-1x_coco.py b/models/cv/detection/hrnet/igie/base/fcos_hrnetv2p-w32-gn-head_4xb4-1x_coco.py deleted file mode 100644 index 17b1a876c1e1a0a41c9928e3835f65768838c642..0000000000000000000000000000000000000000 --- a/models/cv/detection/hrnet/igie/base/fcos_hrnetv2p-w32-gn-head_4xb4-1x_coco.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -_base_ = 'fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py' -model = dict( - data_preprocessor=dict( - mean=[103.53, 116.28, 123.675], - std=[57.375, 57.12, 58.395], - bgr_to_rgb=False), - backbone=dict( - _delete_=True, - type='HRNet', - extra=dict( - stage1=dict( - num_modules=1, - num_branches=1, - block='BOTTLENECK', - num_blocks=(4, ), - num_channels=(64, )), - stage2=dict( - num_modules=1, - num_branches=2, - block='BASIC', - num_blocks=(4, 4), - num_channels=(32, 64)), - stage3=dict( - num_modules=4, - num_branches=3, - block='BASIC', - num_blocks=(4, 4, 4), - num_channels=(32, 64, 128)), - stage4=dict( - num_modules=3, - num_branches=4, - block='BASIC', - num_blocks=(4, 4, 4, 4), - num_channels=(32, 64, 128, 256))), - init_cfg=dict( - type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')), - neck=dict( - _delete_=True, - type='HRFPN', - in_channels=[32, 64, 128, 256], - out_channels=256, - stride=2, - num_outs=5)) diff --git a/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_1x_coco.py b/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_1x_coco.py deleted file mode 100644 index 9e04ad5f5ce3001e2a6b36590eef7ba8d3a0511a..0000000000000000000000000000000000000000 --- a/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_1x_coco.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -_base_ = [ - 'coco_detection.py', - 'schedule_1x.py', 'default_runtime.py' -] - -# model settings -model = dict( - type='FCOS', - data_preprocessor=dict( - type='DetDataPreprocessor', - mean=[102.9801, 115.9465, 122.7717], - std=[1.0, 1.0, 1.0], - bgr_to_rgb=False, - pad_size_divisor=32), - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - norm_eval=True, - style='caffe', - init_cfg=dict( - type='Pretrained', - checkpoint='open-mmlab://detectron/resnet50_caffe')), - neck=dict( - type='FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs='on_output', # use P5 - num_outs=5, - relu_before_extra_convs=True), - bbox_head=dict( - type='FCOSHead', - num_classes=80, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - loss_cls=dict( - type='FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='IoULoss', loss_weight=1.0), - loss_centerness=dict( - type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), - # testing settings - test_cfg=dict( - nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms', iou_threshold=0.5), - max_per_img=100)) - -# learning rate -param_scheduler = [ - dict(type='ConstantLR', factor=1.0 / 3, by_epoch=False, begin=0, end=500), - dict( - type='MultiStepLR', - begin=0, - end=12, - by_epoch=True, - milestones=[8, 11], - gamma=0.1) -] - -# optimizer -optim_wrapper = dict( - optimizer=dict(lr=0.01), - paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.), - clip_grad=dict(max_norm=35, norm_type=2)) diff --git a/models/cv/detection/hrnet/igie/base/schedule_1x.py b/models/cv/detection/hrnet/igie/base/schedule_1x.py deleted file mode 100644 index 9b16d80c7704f8b55b6d3f3e622399515668f26b..0000000000000000000000000000000000000000 --- a/models/cv/detection/hrnet/igie/base/schedule_1x.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -# training schedule for 1x -train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') - -# learning rate -param_scheduler = [ - dict( - type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), - dict( - type='MultiStepLR', - begin=0, - end=12, - by_epoch=True, - milestones=[8, 11], - gamma=0.1) -] - -# optimizer -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) - -# Default setting for scaling LR automatically -# - `enable` means enable scaling LR automatically -# or not by default. -# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). -auto_scale_lr = dict(enable=False, base_batch_size=16) diff --git a/models/cv/detection/hrnet/igie/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py b/models/cv/detection/hrnet/igie/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py index 010dee7d9feaeb0260bccca28adcc66a899ac0dc..cf81f4fc937bd2dd3301bea1d1ec21849983e699 100644 --- a/models/cv/detection/hrnet/igie/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py +++ b/models/cv/detection/hrnet/igie/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py @@ -30,7 +30,7 @@ env_cfg = dict( dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) load_from = None -log_level = 'INFO' +log_level = 'ERROR' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) model = dict( backbone=dict( @@ -171,7 +171,7 @@ test_dataloader = dict( ann_file='annotations/instances_val2017.json', backend_args=None, data_prefix=dict(img='images/val2017/'), - data_root='/home/peng.yang/Datasets/coco', + data_root='data/coco/', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( @@ -196,7 +196,7 @@ test_dataloader = dict( persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( - ann_file='/home/peng.yang/Datasets/coco/annotations/instances_val2017.json', + ann_file='data/coco/annotations/instances_val2017.json', backend_args=None, format_only=False, metric='bbox', @@ -232,7 +232,7 @@ train_dataloader = dict( dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( - 800, + 1333, 800, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), @@ -246,7 +246,7 @@ train_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( - 800, + 1333, 800, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), @@ -297,5 +297,4 @@ visualizer = dict( type='DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), - ]) -work_dir = './' + ]) \ No newline at end of file diff --git a/models/cv/detection/hrnet/igie/inference.py b/models/cv/detection/hrnet/igie/inference.py index 310eaaefd9e429f28fb7ac301b530f7f7918faa4..601c7a180ae84513fa9f2073892b3e58e37d219f 100644 --- a/models/cv/detection/hrnet/igie/inference.py +++ b/models/cv/detection/hrnet/igie/inference.py @@ -85,7 +85,7 @@ def main(): # create runtime from engine module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) - + # just run perf test if args.perf_only: ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) @@ -99,12 +99,13 @@ def main(): # Runner config cfg = Config.fromfile("fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py") - cfg.work_dir = "./" - + + cfg.work_dir = "./workspace" cfg['test_dataloader']['batch_size'] = batch_size cfg['test_dataloader']['dataset']['data_root'] = args.datasets cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json') + cfg['log_level'] = 'ERROR' runner = RUNNERS.build(cfg) diff --git a/models/cv/detection/paa/igie/README.md b/models/cv/detection/paa/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb701eebf38bfd12e647f964ee63f35c33c633e4 --- /dev/null +++ b/models/cv/detection/paa/igie/README.md @@ -0,0 +1,65 @@ +# PAA + +## Description + +PAA (Probabilistic Anchor Assignment) is an algorithm for object detection that adaptively assigns positive and negative anchor samples using a probabilistic model. It employs a Gaussian mixture model to dynamically select positive and negative samples based on score distribution, avoiding the misassignment issues of traditional IoU threshold-based methods. PAA enhances detection accuracy, particularly in complex scenarios, and is compatible with existing detection frameworks. + +## Setup + +### Install + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-dev + +pip3 install onnx +pip3 install tqdm +pip3 install onnxsim +pip3 install mmdet +pip3 install mmdeploy +pip3 install mmengine +``` + +### Download + +Pretrained model: + +Dataset: to download the validation dataset. + +### Model Conversion + +```bash +# export onnx model +python3 export.py --weight paa_r50_fpn_1x_coco_20200821-936edec3.pth --cfg paa_r50_fpn_1x_coco.py --output paa.onnx + +# Use onnxsim optimize onnx model +onnxsim paa.onnx paa_opt.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_paa_fp16_accuracy.sh +# Performance +bash scripts/infer_paa_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +-------|-----------|----------|----------|----------|---------------| +PAA | 32 | FP16 | 138.414 | 0.555 | 0.381 | + +## Reference + +mmdetection: diff --git a/models/cv/detection/paa/igie/build_engine.py b/models/cv/detection/paa/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/detection/paa/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/paa/igie/deploy_default.py b/models/cv/detection/paa/igie/deploy_default.py new file mode 100644 index 0000000000000000000000000000000000000000..b8d8e43dc829456f0c2e46a7acfc3128757f945d --- /dev/null +++ b/models/cv/detection/paa/igie/deploy_default.py @@ -0,0 +1,41 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +onnx_config = dict( + type='onnx', + export_params=True, + keep_initializers_as_inputs=False, + opset_version=11, + save_file='end2end.onnx', + input_names=['input'], + output_names=['output'], + input_shape=None, + optimize=True) + +codebase_config = dict( + type='mmdet', + task='ObjectDetection', + model_type='end2end', + post_processing=dict( + score_threshold=0.05, + confidence_threshold=0.005, + iou_threshold=0.5, + max_output_boxes_per_class=200, + pre_top_k=5000, + keep_top_k=100, + background_label_id=-1, + )) + +backend_config = dict(type='onnxruntime') \ No newline at end of file diff --git a/models/cv/detection/paa/igie/export.py b/models/cv/detection/paa/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..bceaba7801843feb5ef095f62a71a2f6e0074db4 --- /dev/null +++ b/models/cv/detection/paa/igie/export.py @@ -0,0 +1,74 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse + +import torch +from mmdeploy.utils import load_config +from mmdeploy.apis import build_task_processor + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--cfg", + type=str, + required=True, + help="model config file.") + + parser.add_argument("--output", + type=str, + required=True, + help="export onnx model path.") + + args = parser.parse_args() + return args + +def main(): + args = parse_args() + + deploy_cfg = 'deploy_default.py' + model_cfg = args.cfg + model_checkpoint = args.weight + + deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) + + task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu') + + model = task_processor.build_pytorch_model(model_checkpoint) + + input_names = ['input'] + output_names = ['output'] + dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} + dummy_input = torch.randn(1, 3, 800, 800) + + torch.onnx.export( + model, + dummy_input, + args.output, + input_names = input_names, + dynamic_axes = dynamic_axes, + output_names = output_names, + opset_version=13 + ) + + print("Export onnx model successfully! ") + +if __name__ == '__main__': + main() + diff --git a/models/cv/detection/paa/igie/inference.py b/models/cv/detection/paa/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..5b93aefceb06cd0325688890c6032f967c619bc8 --- /dev/null +++ b/models/cv/detection/paa/igie/inference.py @@ -0,0 +1,158 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from mmdet.registry import RUNNERS +from mmengine.config import Config + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # Runner config + cfg = Config.fromfile("paa_r50_fpn_1x_coco.py") + + cfg.work_dir = "./workspace" + cfg['test_dataloader']['batch_size'] = batch_size + cfg['test_dataloader']['dataset']['data_root'] = args.datasets + cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' + cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json') + cfg['log_level'] = "ERROR" + + runner = RUNNERS.build(cfg) + + for input_data in tqdm(runner.test_dataloader): + + input_data = runner.model.data_preprocessor(input_data, False) + image = input_data['inputs'].cpu() + + pad_batch = len(image) != batch_size + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + module.run() + + cls_score = [] + box_reg = [] + score_factors = [] + + for i in range(module.get_num_outputs()): + output = module.get_output(i).asnumpy() + + if pad_batch: + output = output[:origin_size] + + output = torch.from_numpy(output) + + if output.shape[1] == 80: + cls_score.append(output) + elif output.shape[1] == 4: + box_reg.append(output) + else: + score_factors.append(output) + + batch_img_metas = [ + data_samples.metainfo for data_samples in input_data['data_samples'] + ] + + results_list = runner.model.bbox_head.predict_by_feat(cls_score, box_reg, score_factors, batch_img_metas=batch_img_metas, rescale=True) + + batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], results_list) + + runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=input_data) + + metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py b/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..2797e34790348047904dbb6112bec230fac1a17c --- /dev/null +++ b/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py @@ -0,0 +1,291 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +auto_scale_lr = dict(base_batch_size=16, enable=False) +backend_args = None +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +default_hooks = dict( + checkpoint=dict(interval=1, type='CheckpointHook'), + logger=dict(interval=50, type='LoggerHook'), + param_scheduler=dict(type='ParamSchedulerHook'), + sampler_seed=dict(type='DistSamplerSeedHook'), + timer=dict(type='IterTimerHook'), + visualization=dict(type='DetVisualizationHook')) +default_scope = 'mmdet' +env_cfg = dict( + cudnn_benchmark=False, + dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) +load_from = None +log_level = 'ERROR' +log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) +model = dict( + backbone=dict( + depth=50, + frozen_stages=1, + init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'), + norm_cfg=dict(requires_grad=True, type='BN'), + norm_eval=True, + num_stages=4, + out_indices=( + 0, + 1, + 2, + 3, + ), + style='pytorch', + type='ResNet'), + bbox_head=dict( + anchor_generator=dict( + octave_base_scale=8, + ratios=[ + 1.0, + ], + scales_per_octave=1, + strides=[ + 8, + 16, + 32, + 64, + 128, + ], + type='AnchorGenerator'), + bbox_coder=dict( + target_means=[ + 0.0, + 0.0, + 0.0, + 0.0, + ], + target_stds=[ + 0.1, + 0.1, + 0.2, + 0.2, + ], + type='DeltaXYWHBBoxCoder'), + feat_channels=256, + in_channels=256, + loss_bbox=dict(loss_weight=1.3, type='GIoULoss'), + loss_centerness=dict( + loss_weight=0.5, type='CrossEntropyLoss', use_sigmoid=True), + loss_cls=dict( + alpha=0.25, + gamma=2.0, + loss_weight=1.0, + type='FocalLoss', + use_sigmoid=True), + num_classes=80, + reg_decoded_bbox=True, + score_voting=True, + stacked_convs=4, + topk=9, + type='PAAHead'), + data_preprocessor=dict( + bgr_to_rgb=True, + mean=[ + 123.675, + 116.28, + 103.53, + ], + pad_size_divisor=32, + std=[ + 58.395, + 57.12, + 57.375, + ], + type='DetDataPreprocessor'), + neck=dict( + add_extra_convs='on_output', + in_channels=[ + 256, + 512, + 1024, + 2048, + ], + num_outs=5, + out_channels=256, + start_level=1, + type='FPN'), + test_cfg=dict( + max_per_img=100, + min_bbox_size=0, + nms=dict(iou_threshold=0.6, type='nms'), + nms_pre=1000, + score_thr=0.05), + train_cfg=dict( + allowed_border=-1, + assigner=dict( + ignore_iof_thr=-1, + min_pos_iou=0, + neg_iou_thr=0.1, + pos_iou_thr=0.1, + type='MaxIoUAssigner'), + debug=False, + pos_weight=-1), + type='PAA') +optim_wrapper = dict( + optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001), + type='OptimWrapper') +param_scheduler = [ + dict( + begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'), + dict( + begin=0, + by_epoch=True, + end=12, + gamma=0.1, + milestones=[ + 8, + 11, + ], + type='MultiStepLR'), +] +resume = False +test_cfg = dict(type='TestLoop') +test_dataloader = dict( + batch_size=32, + dataset=dict( + ann_file='annotations/instances_val2017.json', + backend_args=None, + data_prefix=dict(img='images/val2017/'), + data_root='data/coco/', + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +test_evaluator = dict( + ann_file='data/coco/annotations/instances_val2017.json', + backend_args=None, + format_only=False, + metric='bbox', + type='CocoMetric') +test_pipeline = [ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), +] +train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1) +train_dataloader = dict( + batch_sampler=dict(type='AspectRatioBatchSampler'), + batch_size=2, + dataset=dict( + ann_file='annotations/instances_train2017.json', + backend_args=None, + data_prefix=dict(img='train2017/'), + data_root='data/coco/', + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 1333, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PackDetInputs'), + ], + type='CocoDataset'), + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=True, type='DefaultSampler')) +train_pipeline = [ + dict(backend_args=None, type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(keep_ratio=True, scale=( + 1333, + 800, + ), type='Resize'), + dict(prob=0.5, type='RandomFlip'), + dict(type='PackDetInputs'), +] +val_cfg = dict(type='ValLoop') +val_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file='annotations/instances_val2017.json', + backend_args=None, + data_prefix=dict(img='val2017/'), + data_root='data/coco/', + pipeline=[ + dict(backend_args=None, type='LoadImageFromFile'), + dict(keep_ratio=True, scale=( + 800, + 800, + ), type='Resize'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + 'scale_factor', + ), + type='PackDetInputs'), + ], + test_mode=True, + type='CocoDataset'), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type='DefaultSampler')) +val_evaluator = dict( + ann_file='data/coco/annotations/instances_val2017.json', + backend_args=None, + format_only=False, + metric='bbox', + type='CocoMetric') +vis_backends = [ + dict(type='LocalVisBackend'), +] +visualizer = dict( + name='visualizer', + type='DetLocalVisualizer', + vis_backends=[ + dict(type='LocalVisBackend'), + ]) diff --git a/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py b/models/cv/detection/paa/igie/scripts/infer_paa_fp16_accuracy.sh similarity index 54% rename from models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py rename to models/cv/detection/paa/igie/scripts/infer_paa_fp16_accuracy.sh index 3f9af61a0be5f771f2a4679522df46d8eda4c5a6..9909aa3015608b168579acb2d813ab98226590ca 100644 --- a/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py +++ b/models/cv/detection/paa/igie/scripts/infer_paa_fp16_accuracy.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # @@ -13,7 +15,21 @@ # License for the specific language governing permissions and limitations # under the License. -_base_ = 'fcos_r50-caffe_fpn_gn-head_1x_coco.py' +batchsize=32 +model_path="paa_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,800,800 \ + --precision fp16 \ + --engine_path paa_bs_${batchsize}_fp16.so + -# dataset settings -train_dataloader = dict(batch_size=4, num_workers=4) +# inference +python3 inference.py \ + --engine paa_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/detection/paa/igie/scripts/infer_paa_fp16_performance.sh b/models/cv/detection/paa/igie/scripts/infer_paa_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..49d54558067971af3088a4185887b9a5e65ab859 --- /dev/null +++ b/models/cv/detection/paa/igie/scripts/infer_paa_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="paa_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,800,800 \ + --precision fp16 \ + --engine_path paa_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine paa_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/detection/retinanet/igie/inference.py b/models/cv/detection/retinanet/igie/inference.py index 7e47529ed225fadc5cb3cec4d1d527a9878388e1..75922190011810a1b985a88f5c766bba14e5ff2a 100644 --- a/models/cv/detection/retinanet/igie/inference.py +++ b/models/cv/detection/retinanet/igie/inference.py @@ -97,15 +97,17 @@ def main(): for _ in range(args.warmup): module.run() - # Runner config + # runner config cfg = Config.fromfile("retinanet_r50_fpn_1x_coco.py") - cfg.work_dir = "./" + cfg.work_dir = "./workspace" cfg['test_dataloader']['batch_size'] = batch_size cfg['test_dataloader']['dataset']['data_root'] = args.datasets cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json') + cfg['log_level'] = 'ERROR' + # build runner runner = RUNNERS.build(cfg) for input_data in tqdm(runner.test_dataloader): diff --git a/models/cv/detection/retinanet/igie/retinanet_r50_fpn_1x_coco.py b/models/cv/detection/retinanet/igie/retinanet_r50_fpn_1x_coco.py index 07570f1b86037d33302e6eb977b12ed0ba4c3c60..d176b02d53cb1667178eea6e6aefcd5f4913e57e 100644 --- a/models/cv/detection/retinanet/igie/retinanet_r50_fpn_1x_coco.py +++ b/models/cv/detection/retinanet/igie/retinanet_r50_fpn_1x_coco.py @@ -165,7 +165,7 @@ test_dataloader = dict( ann_file='annotations/instances_val2017.json', backend_args=None, data_prefix=dict(img='images/val2017/'), - data_root='/root/.igie_cache/data/datasets/coco/', + data_root='data/coco/', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=False, scale=( @@ -190,8 +190,7 @@ test_dataloader = dict( persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( - ann_file= - '/root/.igie_cache/data/datasets/coco/annotations/instances_val2017.json', + ann_file='data/coco/annotations/instances_val2017.json', backend_args=None, format_only=False, metric='bbox', @@ -220,7 +219,7 @@ val_dataloader = dict( ann_file='annotations/instances_val2017.json', backend_args=None, data_prefix=dict(img='images/val2017/'), - data_root='/root/.igie_cache/data/datasets/coco/', + data_root='data/coco/', pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=False, scale=( @@ -245,10 +244,8 @@ val_dataloader = dict( persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict( - ann_file= - '/root/.igie_cache/data/datasets/coco/annotations/instances_val2017.json', + ann_file='data/coco/annotations/instances_val2017.json', backend_args=None, format_only=False, metric='bbox', type='CocoMetric') -work_dir = './' diff --git a/models/cv/detection/rtmdet/igie/README.md b/models/cv/detection/rtmdet/igie/README.md index 737d203c279282ccbff080934e52926d7e3d301f..3d25cec459e785f434c864481955c6fd6682e4ae 100644 --- a/models/cv/detection/rtmdet/igie/README.md +++ b/models/cv/detection/rtmdet/igie/README.md @@ -19,13 +19,12 @@ pip3 install onnx pip3 install tqdm pip3 install onnxsim pip3 install mmdet==3.3.0 -pip3 install mmpose==1.3.1 pip3 install mmdeploy==1.3.1 pip3 install mmengine==0.10.4 ``` ### Download -Pretrained model: https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth +Pretrained model: Dataset: to download the validation dataset. @@ -55,7 +54,11 @@ bash scripts/infer_rtmdet_fp16_performance.sh ``` ## Results +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +----------|-----------|----------|----------|----------|---------------| +RTMDet | 32 | FP16 | 2627.15 | 0.619 | 0.403 | -| Model | BatchSize | Input Shape | Precision | FPS | mAP@0.5(%) | -| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: | -| RTMDet | 32 | 320x320 | FP16 | 2627.15 | 0.619 | \ No newline at end of file + +## Reference + +mmdetection: diff --git a/models/cv/detection/rtmdet/igie/inference.py b/models/cv/detection/rtmdet/igie/inference.py index 4c89904b29c95553f2b88a19cb1447a523b9819f..43099007b62be367fa2bee2fc5c6146f4537acb6 100644 --- a/models/cv/detection/rtmdet/igie/inference.py +++ b/models/cv/detection/rtmdet/igie/inference.py @@ -20,7 +20,7 @@ import torch import numpy as np from tvm import relay from tqdm import tqdm -from mmpose.registry import RUNNERS +from mmdet.registry import RUNNERS from mmengine.config import Config def parse_args(): @@ -99,7 +99,7 @@ def main(): # runner config cfg = Config.fromfile("rtmdet_nano_320-8xb32_coco-person.py") - cfg.work_dir = "./" + cfg.work_dir = "./workspace" cfg['test_dataloader']['batch_size'] = batch_size cfg['test_dataloader']['dataset']['data_root'] = args.datasets cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' diff --git a/models/cv/detection/rtmdet/igie/rtmdet_nano_320-8xb32_coco-person.py b/models/cv/detection/rtmdet/igie/rtmdet_nano_320-8xb32_coco-person.py index 6e71fb66fda1f1cb566eb8bb1759795317991633..1b790e55f6415206b0938efdb7318d0fc0f799b4 100644 --- a/models/cv/detection/rtmdet/igie/rtmdet_nano_320-8xb32_coco-person.py +++ b/models/cv/detection/rtmdet/igie/rtmdet_nano_320-8xb32_coco-person.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + auto_scale_lr = dict(base_batch_size=16, enable=False) backend_args = None base_lr = 0.004 @@ -192,7 +207,7 @@ test_dataloader = dict( ann_file='annotations/instances_val2017.json', backend_args=None, data_prefix=dict(img='images/val2017/'), - data_root='/root/.igie_cache/data/datasets/coco', + data_root='/data/coco', metainfo=dict(classes=('person', )), pipeline=[ dict(type='LoadImageFromFile'), @@ -229,8 +244,7 @@ test_dataloader = dict( sampler=dict(_scope_='mmdet', shuffle=False, type='DefaultSampler')) test_evaluator = dict( _scope_='mmdet', - ann_file= - '/root/.igie_cache/data/datasets/coco/annotations/person_keypoints_val2017.json', + ann_file='data/coco/annotations/person_keypoints_val2017.json', backend_args=None, format_only=False, metric='bbox', diff --git a/models/cv/detection/yolov10/igie/README.md b/models/cv/detection/yolov10/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6fa83cac6d5596720e1e68bb8ea1f3e1c51ae394 --- /dev/null +++ b/models/cv/detection/yolov10/igie/README.md @@ -0,0 +1,53 @@ +# YOLOv10 + +## Description +YOLOv10, built on the Ultralytics Python package by researchers at Tsinghua University, introduces a new approach to real-time object detection, addressing both the post-processing and model architecture deficiencies found in previous YOLO versions. By eliminating non-maximum suppression (NMS) and optimizing various model components, YOLOv10 achieves state-of-the-art performance with significantly reduced computational overhead. Extensive experiments demonstrate its superior accuracy-latency trade-offs across multiple model scales. + +## Setup + +### Install + +```bash +pip3 install tqdm +pip3 install huggingface_hub==0.25.2 +``` + +### Download + +Pretrained model: + +## Model Conversion +```bash +git clone https://github.com/THU-MIG/yolov10.git +cd yolov10 +pip3 install -e . --no-deps +cd .. + +python3 export.py --weight yolov10s.pt --batch 32 + +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov10_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov10_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +--------|-----------|----------|----------|----------|---------------| +YOLOv10 | 32 | FP16 | 810.97 | 0.629 | 0.461 | + +## Reference + +YOLOv10: diff --git a/models/cv/detection/yolov10/igie/build_engine.py b/models/cv/detection/yolov10/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/detection/yolov10/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/yolov10/igie/export.py b/models/cv/detection/yolov10/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..eb56c772a60dd77b006169b31b79d6ca05a8fc74 --- /dev/null +++ b/models/cv/detection/yolov10/igie/export.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ultralytics import YOLOv10 + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--batch", + type=int, + required=True, + help="batchsize of the model.") + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = YOLOv10(args.weight).cpu() + + model.export(format='onnx', batch=args.batch, imgsz=(640, 640), optimize=True, simplify=True, opset=13) + +if __name__ == "__main__": + main() diff --git a/models/cv/detection/yolov10/igie/inference.py b/models/cv/detection/yolov10/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c2430eb3e682cde3433de28fa4b9704f55623a --- /dev/null +++ b/models/cv/detection/yolov10/igie/inference.py @@ -0,0 +1,138 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import argparse +import tvm +from tvm import relay + +import numpy as np +from pathlib import Path +from ultralytics import YOLO +from ultralytics.cfg import get_cfg +from ultralytics.utils import DEFAULT_CFG +from validator import IGIE_Validator + + + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + root_path = args.datasets + val_path = os.path.join(root_path, 'val2017.txt') + + overrides = {} + overrides['mode'] = 'val' + + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = args.batchsize + + cfg_args.data = { + 'path': Path(root_path), + 'val': val_path, + 'names': + { + 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', + 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', + 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', + 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', + 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', + 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', + 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', + 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', + 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', + 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', + 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', + 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', + 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', + 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush' + }, + 'nc': 80} + cfg_args.save_json = True + + validator = IGIE_Validator(args=cfg_args, save_dir=Path('.')) + validator.stride = 32 + + stats = validator(module, device) + +if __name__ == "__main__": + main() diff --git a/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_accuracy.sh b/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..2b069afad39105dab12c17c572f1b831fe0503d0 --- /dev/null +++ b/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov10s.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolov10s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolov10s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_performance.sh b/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..4f4b997c7af043baa8ff0445d19d75c8e456a07c --- /dev/null +++ b/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov10s.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolov10s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolov10s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/detection/yolov10/igie/validator.py b/models/cv/detection/yolov10/igie/validator.py new file mode 100644 index 0000000000000000000000000000000000000000..9036ab646498f8a2a2f94b90dead43d8c94dcd2a --- /dev/null +++ b/models/cv/detection/yolov10/igie/validator.py @@ -0,0 +1,87 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import tvm +import json +import torch +import numpy as np +from tqdm import tqdm + +from ultralytics.models.yolov10 import YOLOv10DetectionValidator +from ultralytics.utils.metrics import ConfusionMatrix +from ultralytics.data.converter import coco80_to_coco91_class + +class IGIE_Validator(YOLOv10DetectionValidator): + def __call__(self, engine, device): + self.data = self.args.data + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + self.init_metrics() + + self.stats = {'tp': [], 'conf': [], 'pred_cls': [], 'target_cls': []} + + # wram up + for _ in range(3): + engine.run() + + for batch in tqdm(self.dataloader): + batch = self.preprocess(batch) + + imgs = batch['img'] + pad_batch = len(imgs) != self.args.batch + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:])) + + engine.set_input(0, tvm.nd.array(imgs, device)) + + engine.run() + + outputs = engine.get_output(0).asnumpy() + + if pad_batch: + outputs = outputs[:origin_size] + + outputs = torch.from_numpy(outputs) + + preds = self.postprocess([outputs]) + + self.update_metrics(preds, batch) + + stats = self.get_stats() + + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + print(f'Saving {f.name} ...') + json.dump(self.jdict, f) # flatten and save + + stats = self.eval_json(stats) + + return stats + + def init_metrics(self): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = self.data['names'] + self.nc = len(self.names) + self.metrics.names = self.names + self.confusion_matrix = ConfusionMatrix(nc=80) + self.seen = 0 + self.jdict = [] + self.stats = [] + diff --git a/models/cv/detection/yolov9/igie/README.md b/models/cv/detection/yolov9/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cdeb8119d416d518a57159757af85007c672733b --- /dev/null +++ b/models/cv/detection/yolov9/igie/README.md @@ -0,0 +1,50 @@ +# YOLOv9 + +## Description +YOLOv9 represents a major leap in real-time object detection by introducing innovations like Programmable Gradient Information (PGI) and the Generalized Efficient Layer Aggregation Network (GELAN), significantly improving efficiency, accuracy, and adaptability. Developed by an open-source team and building on the YOLOv5 codebase, it sets new benchmarks on the MS COCO dataset. YOLOv9's architecture effectively addresses information loss in deep neural networks, enhancing learning capacity and ensuring higher detection accuracy. + +## Setup + +### Install + +```bash +pip3 install onnx +pip3 install tqdm +pip3 install onnxsim + +pip3 install ultralytics==8.2.51 +``` + +### Download + +Pretrained model: + +## Model Conversion +```bash +python3 export.py --weight yolov9s.pt --batch 32 +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov9_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov9_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +-------|-----------|----------|----------|----------|---------------| +YOLOv9 | 32 | FP16 | 814.42 | 0.625 | 0.464 | + +## Reference + +YOLOv9: \ No newline at end of file diff --git a/models/cv/detection/yolov9/igie/build_engine.py b/models/cv/detection/yolov9/igie/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5 --- /dev/null +++ b/models/cv/detection/yolov9/igie/build_engine.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_name, input_shape = args.input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict = {input_name: shape} + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/yolov9/igie/export.py b/models/cv/detection/yolov9/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..aec62f72a4bb4d1100a5dd977c99ce9bba065fb2 --- /dev/null +++ b/models/cv/detection/yolov9/igie/export.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ultralytics import YOLO + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--batch", + type=int, + required=True, + help="batchsize of the model.") + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = YOLO(args.weight).cpu() + + model.export(format='onnx', batch=args.batch, imgsz=(640, 640), optimize=True, simplify=True, opset=13) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/detection/yolov9/igie/inference.py b/models/cv/detection/yolov9/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..cbed1c03c2631c6807922d699d6c1994331b4e7d --- /dev/null +++ b/models/cv/detection/yolov9/igie/inference.py @@ -0,0 +1,140 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import os + +import tvm +from tvm import relay + +import numpy as np +from pathlib import Path +from ultralytics import YOLO +from ultralytics.cfg import get_cfg +from ultralytics.utils import DEFAULT_CFG +from validator import IGIE_Validator + + + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + root_path = args.datasets + val_path = os.path.join(root_path, 'val2017.txt') + + overrides = {} + overrides['mode'] = 'val' + + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = args.batchsize + + cfg_args.data = { + 'path': Path(root_path), + 'val': val_path, + 'names': + { + 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', + 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', + 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', + 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', + 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', + 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', + 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', + 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', + 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', + 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', + 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', + 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', + 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', + 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush' + }, + 'nc': 80} + cfg_args.save_json = True + + validator = IGIE_Validator(args=cfg_args, save_dir=Path('.')) + validator.stride = 32 + + stats = validator(module, device) + +if __name__ == "__main__": + main() diff --git a/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_accuracy.sh b/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..2158b3bc3e45d821559f9902b875928e76113711 --- /dev/null +++ b/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov9s.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolov9s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolov9s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_performance.sh b/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..596403169a88e507b1c9283eb1f9536d6ceb17f7 --- /dev/null +++ b/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov9s.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolov9s_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolov9s_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/detection/yolov9/igie/validator.py b/models/cv/detection/yolov9/igie/validator.py new file mode 100644 index 0000000000000000000000000000000000000000..b717b5c474446566b7774a1dcfe9906b10c506b5 --- /dev/null +++ b/models/cv/detection/yolov9/igie/validator.py @@ -0,0 +1,89 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import tvm +import json +import torch +import numpy as np + +from tqdm import tqdm + +from ultralytics.models.yolo.detect import DetectionValidator +from ultralytics.data.utils import check_det_dataset +from ultralytics.utils.metrics import ConfusionMatrix +from ultralytics.data.converter import coco80_to_coco91_class + +class IGIE_Validator(DetectionValidator): + def __call__(self, engine, device): + self.data = self.args.data + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + self.init_metrics() + + self.stats = {'tp': [], 'conf': [], 'pred_cls': [], 'target_cls': [], 'target_img': []} + + # wram up + for _ in range(3): + engine.run() + + for batch in tqdm(self.dataloader): + batch = self.preprocess(batch) + + imgs = batch['img'] + pad_batch = len(imgs) != self.args.batch + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:])) + + engine.set_input(0, tvm.nd.array(imgs, device)) + + engine.run() + + outputs = engine.get_output(0).asnumpy() + + if pad_batch: + outputs = outputs[:origin_size] + + outputs = torch.from_numpy(outputs) + + preds = self.postprocess([outputs]) + + self.update_metrics(preds, batch) + + stats = self.get_stats() + + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + print(f'Saving {f.name} ...') + json.dump(self.jdict, f) # flatten and save + + stats = self.eval_json(stats) + + return stats + + def init_metrics(self): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = self.data['names'] + self.nc = len(self.names) + self.metrics.names = self.names + self.confusion_matrix = ConfusionMatrix(nc=80) + self.seen = 0 + self.jdict = [] + self.stats = [] + diff --git a/models/cv/ocr/kie_layoutxlm/README.md b/models/cv/ocr/kie_layoutxlm/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8a4eed92a9d07128841cab87a08df1746e197938 --- /dev/null +++ b/models/cv/ocr/kie_layoutxlm/README.md @@ -0,0 +1,66 @@ +# LayoutXLM +## Description +LayoutXLM is a groundbreaking multimodal pre-trained model for multilingual document understanding, achieving exceptional performance by integrating text, layout, and image data. + +## Setup +```shell +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install Polygon3 +pip3 install paddlenlp==2.8.1 +pip3 install lanms-neo==1.0.2 +pip3 install paddleocr==2.6.0 +pip3 install paddle2onnx +pip3 install python-bidi +``` + +## Download +Pretrained model: + +Dataset: to download the XFUND_zh dataset. + +## Model Conversion +```shell + +tar -xf ser_vi_layoutxlm_xfund_pretrained.tar +tar -xf XFUND.tar + +git clone -b release/2.6 https://github.com/PaddlePaddle/PaddleOCR.git + +cd PaddleOCR +mkdir -p train_data/XFUND +cp ../XFUND/class_list_xfun.txt train_data/XFUND + +# Export the trained model into inference model +python3 tools/export_model.py -c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml -o Architecture.Backbone.checkpoints=../ser_vi_layoutxlm_xfund_pretrained/best_accuracy Global.save_inference_dir=./inference/ser_vi_layoutxlm + +# Export the inference model to onnx model +paddle2onnx --model_dir ./inference/ser_vi_layoutxlm --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ../kie_ser.onnx --opset_version 11 --enable_onnx_checker True + +cd .. + +# Use onnxsim optimize onnx model +onnxsim kie_ser.onnx kie_ser_opt.onnx +``` + +## Inference +```shell +export DATASETS_DIR=/Path/to/XFUND/ +``` +### FP16 +```shell +# Accuracy +bash scripts/infer_kie_ser_fp16_accuracy.sh +# Performance +bash scripts/infer_kie_ser_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |Hmean | +--------|-----------|----------|----------|----------| +Kie_ser | 8 | FP16 | 107.65 | 93.61% | + +## Reference +PaddleOCR: diff --git a/models/cv/ocr/kie_layoutxlm/build_engine.py b/models/cv/ocr/kie_layoutxlm/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..cf27aa83a8132e4b917664fb4ba7c820f5326b1e --- /dev/null +++ b/models/cv/ocr/kie_layoutxlm/build_engine.py @@ -0,0 +1,77 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + nargs='+', + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_dict = {} + + for input in args.input: + input_name, input_shape = input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict[input_name] = shape + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/ocr/kie_layoutxlm/inference.py b/models/cv/ocr/kie_layoutxlm/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..2154d5c8d9051136d7023747f8dd7d27c958245c --- /dev/null +++ b/models/cv/ocr/kie_layoutxlm/inference.py @@ -0,0 +1,157 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import tvm +import yaml +import logging +import numpy as np +from tvm import relay +from tqdm import tqdm + +import paddle +from paddleocr import ppocr +from ppocr.data import build_dataloader +from ppocr.utils.logging import get_logger +from ppocr.postprocess import build_post_process + +class VQASerTokenMetric(object): + def __init__(self, main_indicator='hmean', **kwargs): + self.main_indicator = main_indicator + self.reset() + + def __call__(self, preds, batch, **kwargs): + preds, labels = preds + self.pred_list.extend(preds) + self.gt_list.extend(labels) + + def get_metric(self): + from seqeval.metrics import f1_score, precision_score, recall_score + metrics = { + "precision": precision_score(self.gt_list, self.pred_list), + "recall": recall_score(self.gt_list, self.pred_list), + "hmean": f1_score(self.gt_list, self.pred_list), + } + self.reset() + return metrics + + def reset(self): + self.pred_list = [] + self.gt_list = [] + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + logger = get_logger(log_level=logging.INFO) + + input_names = args.input_name.split(",") + config = yaml.load(open("ser_vi_layoutxlm_xfund_zh.yml", 'rb'), Loader=yaml.Loader) + config['Eval']['loader']['batch_size_per_card'] = args.batchsize + config['Eval']['dataset']['data_dir'] = os.path.join(args.datasets, "zh_val/image") + config['Eval']['dataset']['label_file_list'] = os.path.join(args.datasets, "zh_val/val.json") + config['Eval']['dataset']['transforms'][1]['VQATokenLabelEncode']['class_path'] = os.path.join(args.datasets, "class_list_xfun.txt") + config['PostProcess']['class_path'] = os.path.join(args.datasets, "class_list_xfun.txt") + + # build dataloader + config['Eval']['loader']['drop_last'] = True + valid_dataloder = build_dataloader(config, 'Eval', paddle.set_device("cpu"), logger) + + # build post process + post_process_class = build_post_process(config['PostProcess']) + + # build metric + eval_class = eval('VQASerTokenMetric')() + + # creat target and device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = args.batchsize * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + for batch in tqdm(valid_dataloder): + + for idx, input_name in enumerate(input_names): + module.set_input(input_name, tvm.nd.array(batch[idx], device)) + + module.run() + + outputs = module.get_output(0).asnumpy() + outputs = paddle.to_tensor(outputs) + + batch_numpy = [] + for item in batch: + batch_numpy.append(item.numpy()) + + post_result = post_process_class((outputs), batch_numpy) + eval_class(post_result, batch_numpy) + + metric = eval_class.get_metric() + print(metric) + +if __name__ == "__main__": + main() diff --git a/models/cv/detection/centernet/igie/base/schedule_1x.py b/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_accuracy.sh similarity index 43% rename from models/cv/detection/centernet/igie/base/schedule_1x.py rename to models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_accuracy.sh index 9b16d80c7704f8b55b6d3f3e622399515668f26b..32282d8fef47df63f215189ef89d8fd05997a586 100644 --- a/models/cv/detection/centernet/igie/base/schedule_1x.py +++ b/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_accuracy.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # @@ -11,33 +13,23 @@ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations -# under the License. +# under the License.i -# training schedule for 1x -train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') +batchsize=8 +model_path="kie_ser_opt.onnx" +datasets_path=${DATASETS_DIR} -# learning rate -param_scheduler = [ - dict( - type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), - dict( - type='MultiStepLR', - begin=0, - end=12, - by_epoch=True, - milestones=[8, 11], - gamma=0.1) -] +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input x_0:${batchsize},512 x_1:${batchsize},512,4 x_2:${batchsize},512 x_3:${batchsize},512 \ + --precision fp16 \ + --engine_path kie_ser_bs_${batchsize}_fp16.so -# optimizer -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) -# Default setting for scaling LR automatically -# - `enable` means enable scaling LR automatically -# or not by default. -# - `base_batch_size` = (8 GPUs) x (2 samples per GPU). -auto_scale_lr = dict(enable=False, base_batch_size=16) +# inference +python3 inference.py \ + --engine kie_ser_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name x_0,x_1,x_2,x_3 \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_performance.sh b/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..2205c987763aac15697ee3fd6879e84a660d0167 --- /dev/null +++ b/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License.i + +batchsize=8 +model_path="kie_ser_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input x_0:${batchsize},512 x_1:${batchsize},512,4 x_2:${batchsize},512 x_3:${batchsize},512 \ + --precision fp16 \ + --engine_path kie_ser_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine kie_ser_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name x_0,x_1,x_2,x_3 \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/ocr/kie_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml b/models/cv/ocr/kie_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml new file mode 100644 index 0000000000000000000000000000000000000000..5f2f6345668508a531d03b171c8dc6a8ad9b32bf --- /dev/null +++ b/models/cv/ocr/kie_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml @@ -0,0 +1,151 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +Global: + use_gpu: True + epoch_num: &epoch_num 200 + log_smooth_window: 10 + print_batch_step: 10 + save_model_dir: ./output/ser_vi_layoutxlm_xfund_zh + save_epoch_step: 2000 + # evaluation is run every 10 iterations after the 0th iteration + eval_batch_step: [ 0, 19 ] + cal_metric_during_train: False + save_inference_dir: + use_visualdl: False + seed: 2022 + infer_img: ppstructure/docs/kie/input/zh_val_42.jpg + # if you want to predict using the groundtruth ocr info, + # you can use the following config + # infer_img: train_data/XFUND/zh_val/val.json + # infer_mode: False + + save_res_path: ./output/ser/xfund_zh/res + kie_rec_model_dir: + kie_det_model_dir: + +Architecture: + model_type: kie + algorithm: &algorithm "LayoutXLM" + Transform: + Backbone: + name: LayoutXLMForSer + pretrained: True + checkpoints: + # one of base or vi + mode: vi + num_classes: &num_classes 7 + +Loss: + name: VQASerTokenLayoutLMLoss + num_classes: *num_classes + key: "backbone_out" + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.999 + lr: + name: Linear + learning_rate: 0.00005 + epochs: *epoch_num + warmup_epoch: 2 + regularizer: + name: L2 + factor: 0.00000 + +PostProcess: + name: VQASerTokenLayoutLMPostProcess + class_path: &class_path train_data/XFUND/class_list_xfun.txt + +Metric: + name: VQASerTokenMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/XFUND/zh_train/image + label_file_list: + - train_data/XFUND/zh_train/train.json + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: RGB + channel_first: False + - VQATokenLabelEncode: # Class handling label + contains_re: False + algorithm: *algorithm + class_path: *class_path + use_textline_bbox_info: &use_textline_bbox_info True + # one of [None, "tb-yx"] + order_method: &order_method "tb-yx" + - VQATokenPad: + max_seq_len: &max_seq_len 512 + return_attention_mask: True + - VQASerTokenChunk: + max_seq_len: *max_seq_len + - Resize: + size: [224,224] + - NormalizeImage: + scale: 1 + mean: [ 123.675, 116.28, 103.53 ] + std: [ 58.395, 57.12, 57.375 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/XFUND/zh_val/image + label_file_list: + - train_data/XFUND/zh_val/val.json + transforms: + - DecodeImage: # load image + img_mode: RGB + channel_first: False + - VQATokenLabelEncode: # Class handling label + contains_re: False + algorithm: *algorithm + class_path: *class_path + use_textline_bbox_info: *use_textline_bbox_info + order_method: *order_method + - VQATokenPad: + max_seq_len: *max_seq_len + return_attention_mask: True + - VQASerTokenChunk: + max_seq_len: *max_seq_len + - Resize: + size: [224,224] + - NormalizeImage: + scale: 1 + mean: [ 123.675, 116.28, 103.53 ] + std: [ 58.395, 57.12, 57.375 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 8 + num_workers: 4 diff --git a/models/cv/ocr/rec_svtr/README.md b/models/cv/ocr/rec_svtr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b986a545eabb91de616a4e192408dc0d0b850a7d --- /dev/null +++ b/models/cv/ocr/rec_svtr/README.md @@ -0,0 +1,62 @@ +# SVTR +## Description +SVTR proposes a single vision model for scene text recognition. This model completely abandons sequence modeling within the patch-wise image tokenization framework. Under the premise of competitive accuracy, the model has fewer parameters and faster speed. + +## Setup +```shell +pip3 install tqdm +pip3 install onnx +pip3 install onnxsim +pip3 install numpy==1.24.0 +pip3 install Polygon3 +pip3 install paddlenlp==2.8.1 +pip3 install lanms-neo==1.0.2 +pip3 install paddleocr==2.8.1 +pip3 install paddle2onnx +pip3 install python-bidi +``` + +## Download +Pretrained model: + +Dataset: to download the lmdb evaluation datasets. + +## Model Conversion +```shell +tar -xf rec_svtr_tiny_none_ctc_en_train.tar + +git clone -b release/2.6 https://github.com/PaddlePaddle/PaddleOCR.git + +cd PaddleOCR + +# Export the trained model into inference model +python3 tools/export_model.py -c ../rec_svtr_tiny_6local_6global_stn_en.yml -o Global.pretrained_model=../rec_svtr_tiny_none_ctc_en_train/best_accuracy Global.save_inference_dir=./inference/rec_svtr_tiny + +# Export the inference model to onnx model +paddle2onnx --model_dir ./inference/rec_svtr_tiny --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ../SVTR.onnx --opset_version 13 --enable_onnx_checker True + +cd .. + +# Use onnxsim optimize onnx model +onnxsim SVTR.onnx SVTR_opt.onnx +``` + +## Inference +```shell +export DATASETS_DIR=/Path/to/lmdb_evaluation/ +``` +### FP16 +```shell +# Accuracy +bash scripts/infer_svtr_fp16_accuracy.sh +# Performance +bash scripts/infer_svtr_fp16_performance.sh +``` + +## Results +Model |BatchSize |Precision |FPS |Acc | +--------|-----------|----------|----------|----------| +SVTR | 32 | FP16 | 4936.47 | 88.29% | + +## Reference +PaddleOCR: https://github.com/PaddlePaddle/PaddleOCR/blob/main/docs/algorithm/text_recognition/algorithm_rec_svtr.md diff --git a/models/cv/ocr/rec_svtr/build_engine.py b/models/cv/ocr/rec_svtr/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..cf27aa83a8132e4b917664fb4ba7c820f5326b1e --- /dev/null +++ b/models/cv/ocr/rec_svtr/build_engine.py @@ -0,0 +1,77 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tvm +import argparse +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--engine_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--input", + type=str, + required=True, + nargs='+', + help=""" + input info of the model, format should be: + input_name:input_shape + eg: --input input:1,3,224,224. + """) + + parser.add_argument("--precision", + type=str, + choices=["fp32", "fp16", "int8"], + required=True, + help="model inference precision.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + # get input valueinfo + input_dict = {} + + for input in args.input: + input_name, input_shape = input.split(":") + shape = tuple([int(s) for s in input_shape.split(",")]) + input_dict[input_name] = shape + + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + + mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie") + + # build engine + lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision) + + # export engine + lib.export_library(args.engine_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/ocr/rec_svtr/inference.py b/models/cv/ocr/rec_svtr/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..b91c4699e6623177541c87b9c136dafeb98c2c70 --- /dev/null +++ b/models/cv/ocr/rec_svtr/inference.py @@ -0,0 +1,192 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import tvm +import yaml +import logging +import numpy as np +from tvm import relay +from tqdm import tqdm + +import paddle +from paddleocr import ppocr +from ppocr.data import build_dataloader +from ppocr.utils.logging import get_logger +from ppocr.postprocess import build_post_process + +import string +from rapidfuzz.distance import Levenshtein + + +class RecMetric(object): + def __init__(self, + main_indicator='acc', + is_filter=False, + ignore_space=True, + **kwargs): + self.main_indicator = main_indicator + self.is_filter = is_filter + self.ignore_space = ignore_space + self.eps = 1e-5 + self.reset() + + def _normalize_text(self, text): + text = ''.join( + filter(lambda x: x in (string.digits + string.ascii_letters), text)) + return text.lower() + + def __call__(self, pred_label, *args, **kwargs): + preds, labels = pred_label + correct_num = 0 + all_num = 0 + norm_edit_dis = 0.0 + for (pred, pred_conf), (target, _) in zip(preds, labels): + if self.ignore_space: + pred = pred.replace(" ", "") + target = target.replace(" ", "") + if self.is_filter: + pred = self._normalize_text(pred) + target = self._normalize_text(target) + norm_edit_dis += Levenshtein.normalized_distance(pred, target) + if pred == target: + correct_num += 1 + all_num += 1 + self.correct_num += correct_num + self.all_num += all_num + self.norm_edit_dis += norm_edit_dis + return { + 'acc': correct_num / (all_num + self.eps), + 'norm_edit_dis': 1 - norm_edit_dis / (all_num + self.eps) + } + + def get_metric(self): + """ + return metrics { + 'acc': 0, + 'norm_edit_dis': 0, + } + """ + acc = 1.0 * self.correct_num / (self.all_num + self.eps) + norm_edit_dis = 1 - self.norm_edit_dis / (self.all_num + self.eps) + self.reset() + return {'acc': acc, 'norm_edit_dis': norm_edit_dis} + + def reset(self): + self.correct_num = 0 + self.all_num = 0 + self.norm_edit_dis = 0 + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + logger = get_logger(log_level=logging.INFO) + + config = yaml.load(open("rec_svtr_tiny_6local_6global_stn_en.yml", 'rb'), Loader=yaml.Loader) + config['Eval']['loader']['batch_size_per_card'] = args.batchsize + config['Eval']['dataset']['data_dir'] = os.path.join(args.datasets) + + # build dataloader + config['Eval']['loader']['drop_last'] = True + valid_dataloder = build_dataloader(config, 'Eval', paddle.set_device("cpu"), logger) + + # build post process + global_config = config['Global'] + post_process_class = build_post_process(config['PostProcess'], global_config) + + # build metric + eval_class = eval('RecMetric')() + + # creat target and device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = args.batchsize * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + for batch in tqdm(valid_dataloder): + images = batch[0] + + module.set_input(args.input_name, tvm.nd.array(images, device)) + + module.run() + + outputs = module.get_output(0).asnumpy() + outputs = paddle.to_tensor(outputs) + + batch_numpy = [] + for item in batch: + batch_numpy.append(item.numpy()) + + post_result = post_process_class((outputs), batch_numpy[1]) + eval_class(post_result, batch_numpy) + + metric = eval_class.get_metric() + print(metric) + +if __name__ == "__main__": + main() diff --git a/models/cv/ocr/rec_svtr/rec_svtr_tiny_6local_6global_stn_en.yml b/models/cv/ocr/rec_svtr/rec_svtr_tiny_6local_6global_stn_en.yml new file mode 100644 index 0000000000000000000000000000000000000000..3c87f9af99669aab5b6d7d133dd2206f283254d7 --- /dev/null +++ b/models/cv/ocr/rec_svtr/rec_svtr_tiny_6local_6global_stn_en.yml @@ -0,0 +1,132 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +Global: + use_gpu: True + epoch_num: 20 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/rec_svtr_tiny_en/ + save_epoch_step: 1 + # evaluation is run every 2000 iterations after the 0th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: + character_type: en + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_svtr_tiny.txt + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.99 + epsilon: 0.00000008 + weight_decay: 0.05 + no_weight_decay_name: norm pos_embed + one_dim_param_no_weight_decay: true + lr: + name: Cosine + learning_rate: 0.0005 + warmup_epoch: 2 + +Architecture: + model_type: rec + algorithm: SVTR + Transform: + name: STN_ON + tps_inputsize: [32, 64] + tps_outputsize: [32, 100] + num_control_points: 20 + tps_margins: [0.05,0.05] + stn_activation: none + Backbone: + name: SVTRNet + img_size: [32, 100] + out_char_num: 25 + out_channels: 192 + patch_merging: 'Conv' + embed_dim: [64, 128, 256] + depth: [3, 6, 3] + num_heads: [2, 4, 8] + mixer: ['Local','Local','Local','Local','Local','Local','Global','Global','Global','Global','Global','Global'] + local_mixer: [[7, 11], [7, 11], [7, 11]] + last_stage: True + prenorm: false + Neck: + name: SequenceEncoder + encoder_type: reshape + Head: + name: CTCHead + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: LMDBDataSet + data_dir: ./train_data/data_lmdb_release/training/ + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - SVTRRecResizeImg: + character_dict_path: + image_shape: [3, 64, 256] + padding: False + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 512 + drop_last: True + num_workers: 4 + +Eval: + dataset: + name: LMDBDataSet + data_dir: ./train_data/data_lmdb_release/evaluation/ + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - SVTRRecResizeImg: + character_dict_path: + image_shape: [3, 64, 256] + padding: False + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 2 diff --git a/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_accuracy.sh b/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..e195990ed85fc6288295239d2df88ef565fdd087 --- /dev/null +++ b/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License.i + +batchsize=32 +model_path="SVTR_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input x:${batchsize},3,64,256 \ + --precision fp16 \ + --engine_path svtr_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine svtr_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name x \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_performance.sh b/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..34484ee8e3f6b95ec0a2bc14aa0944e83edc73bd --- /dev/null +++ b/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License.i + +batchsize=32 +model_path="SVTR_opt.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 build_engine.py \ + --model_path ${model_path} \ + --input x:${batchsize},3,64,256 \ + --precision fp16 \ + --engine_path svtr_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine svtr_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name x \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/pose_estimation/rtmpose/igie/README.md b/models/cv/pose_estimation/rtmpose/igie/README.md index 2b7fb929b6fdb9c0b83f883858c6efba4cf408df..d85e1cf166ad188bf77bff87e466c889b9caf087 100644 --- a/models/cv/pose_estimation/rtmpose/igie/README.md +++ b/models/cv/pose_estimation/rtmpose/igie/README.md @@ -25,7 +25,7 @@ pip3 install mmengine==0.10.4 ``` ### Download -Pretrained model: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth +Pretrained model: Dataset: to download the validation dataset. @@ -56,6 +56,11 @@ bash scripts/infer_rtmpose_fp16_performance.sh ## Results -| Model | BatchSize | Input Shape | Precision | FPS | mAP@0.5(%) | -| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: | -| RTMPose | 32 | 252x196 | FP16 | 2313.33 | 0.936 | +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +----------|-----------|----------|----------|----------|---------------| +RTMPose | 32 | FP16 | 2313.33 | 0.936 | 0.773 | + + +## Reference + +mmpose: \ No newline at end of file diff --git a/models/cv/pose_estimation/rtmpose/igie/inference.py b/models/cv/pose_estimation/rtmpose/igie/inference.py index d01502e26ccdcf4555b35fba8865befe9a1b3453..6138fab64dd5dee4ac93244e6a4b83ef67067693 100644 --- a/models/cv/pose_estimation/rtmpose/igie/inference.py +++ b/models/cv/pose_estimation/rtmpose/igie/inference.py @@ -99,7 +99,7 @@ def main(): # runner config cfg = Config.fromfile("rtmpose-m_8xb256-420e_coco-256x192.py") - cfg.work_dir = "./" + cfg.work_dir = "./workspace" cfg['test_dataloader']['batch_size'] = batch_size cfg['test_dataloader']['dataset']['data_root'] = args.datasets cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/' diff --git a/models/cv/pose_estimation/rtmpose/igie/rtmpose-m_8xb256-420e_coco-256x192.py b/models/cv/pose_estimation/rtmpose/igie/rtmpose-m_8xb256-420e_coco-256x192.py index 9428b7fee0979ba91bc996a5f2c095d9662e946e..cd116625e6492bb5fa3ef38c3c47f21c03e54670 100644 --- a/models/cv/pose_estimation/rtmpose/igie/rtmpose-m_8xb256-420e_coco-256x192.py +++ b/models/cv/pose_estimation/rtmpose/igie/rtmpose-m_8xb256-420e_coco-256x192.py @@ -235,7 +235,7 @@ test_dataloader = dict( ann_file='annotations/person_keypoints_val2017.json', data_mode='topdown', data_prefix=dict(img='images/val2017/'), - data_root='/root/.igie_cache/data/datasets/coco', + data_root='data/coco/', pipeline=[ dict(backend_args=dict(backend='local'), type='LoadImage'), dict(type='GetBBoxCenterScale'), @@ -252,8 +252,7 @@ test_dataloader = dict( persistent_workers=True, sampler=dict(round_up=False, shuffle=False, type='DefaultSampler')) test_evaluator = dict( - ann_file= - '/root/.igie_cache/data/datasets/coco/annotations/person_keypoints_val2017.json', + ann_file='data/coco/annotations/person_keypoints_val2017.json', type='CocoMetric') train_batch_size = 256 train_cfg = dict(by_epoch=True, max_epochs=420, val_interval=10) @@ -462,4 +461,3 @@ visualizer = dict( vis_backends=[ dict(type='LocalVisBackend'), ]) -work_dir = './'