diff --git a/models/cv/classification/efficientnet_b7/igie/README.md b/models/cv/classification/efficientnet_b7/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fa6c2b08c9620cfaf33bd856944a57a258f405e7 --- /dev/null +++ b/models/cv/classification/efficientnet_b7/igie/README.md @@ -0,0 +1,53 @@ +# EfficientNet B7 (IGIE) + +## Model Description + +EfficientNet B7 is an advanced convolutional neural network model created by Google, which extends the Compound Scaling method to optimize the balance between network depth, width, and input resolution. It builds upon components such as Inverted Residual Blocks (MBConv), Squeeze-and-Excitation (SE) modules, and the Swish activation function. EfficientNet-B7 achieves state-of-the-art performance in areas like image classification and object detection. Although it demands substantial computational resources, its superior accuracy and efficiency render it well-suited for highly complex and demanding vision applications. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +|--------|-----------|---------| +| MR-V100| 4.3.0 | 25.09 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +pip3 install -r ../../igie_common/requirements.txt +``` + +### Model Conversion + +```bash +python3 ../../igie_common/export.py --model-name efficientnet_b7 --weight efficientnet_b7_lukemelas-c5b4e57e.pth --output efficientnet_b7.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +export RUN_DIR=../../igie_common/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_efficientnet_b7_fp16_accuracy.sh +# Performance +bash scripts/infer_efficientnet_b7_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| --------------- | --------- | --------- | -------- | -------- | -------- | +| Efficientnet_b7 | 32 | FP16 | 388.502 | 73.902 | 91.531 | diff --git a/models/cv/classification/efficientnet_b7/igie/ci/prepare.sh b/models/cv/classification/efficientnet_b7/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..3f98519646829e24cb93abe1854079bac796b9b0 --- /dev/null +++ b/models/cv/classification/efficientnet_b7/igie/ci/prepare.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install -r ../../igie_common/requirements.txt +python3 ../../igie_common/export.py --model-name efficientnet_b7 --weight efficientnet_b7_lukemelas-c5b4e57e.pth --output efficientnet_b7.onnx diff --git a/models/cv/classification/efficientnet_b7/igie/requirements.txt b/models/cv/classification/efficientnet_b7/igie/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..9e8111264d4bb2c985cdd10c1de3b894d4e50bef --- /dev/null +++ b/models/cv/classification/efficientnet_b7/igie/requirements.txt @@ -0,0 +1,2 @@ +onnx +tqdm diff --git a/models/cv/classification/efficientnet_b7/igie/scripts/infer_efficientnet_b7_fp16_accuracy.sh b/models/cv/classification/efficientnet_b7/igie/scripts/infer_efficientnet_b7_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..fb8d24f2e8547684122aeebfcc31c33f8cf988a7 --- /dev/null +++ b/models/cv/classification/efficientnet_b7/igie/scripts/infer_efficientnet_b7_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="efficientnet_b7.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 ${RUN_DIR}build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path efficientnet_b7_bs_${batchsize}_fp16.so + + +# inference +python3 ${RUN_DIR}inference.py \ + --engine efficientnet_b7_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b7/igie/scripts/infer_efficientnet_b7_fp16_performance.sh b/models/cv/classification/efficientnet_b7/igie/scripts/infer_efficientnet_b7_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..0df31132ef0acc5c6645d8e53dbce968ed2d9dfd --- /dev/null +++ b/models/cv/classification/efficientnet_b7/igie/scripts/infer_efficientnet_b7_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="efficientnet_b7.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 ${RUN_DIR}build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path efficientnet_b7_bs_${batchsize}_fp16.so + + +# inference +python3 ${RUN_DIR}inference.py \ + --engine efficientnet_b7_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/regnet_x_800mf/igie/README.md b/models/cv/classification/regnet_x_800mf/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..432764d9da330d82693d94deed1a4b19f1136dd5 --- /dev/null +++ b/models/cv/classification/regnet_x_800mf/igie/README.md @@ -0,0 +1,53 @@ +# RegNet_x_800mf (IGIE) + +## Model Description + +RegNet_x_800mf is a lightweight deep learning model designed with a regularized architecture, utilizing Bottleneck Blocks for efficient feature extraction. With moderate computational complexity, it is well-suited for mid-to-large-scale image classification tasks in environments with limited computational resources. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | 4.3.0 | 25.09 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +pip3 install -r ../../igie_common/requirements.txt +``` + +### Model Conversion + +```bash +python3 ../../igie_common/export.py --model-name regnet_x_800mf --weight regnet_x_800mf-94a99ebd.pth --output regnet_x_800mf.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +export RUN_DIR=../../igie_common/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_regnet_x_800mf_fp16_accuracy.sh +# Performance +bash scripts/infer_regnet_x_800mf_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| :----: | :----: | :----: | :----: | :----: | :----: | +| RegNet_x_800mf | 32 | FP16 | 4630.916| 77.091 | 93.602 | diff --git a/models/cv/classification/regnet_x_800mf/igie/ci/prepare.sh b/models/cv/classification/regnet_x_800mf/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..a9f7525024d7872a5ab09c796f8a1c65eb157d98 --- /dev/null +++ b/models/cv/classification/regnet_x_800mf/igie/ci/prepare.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install -r ../../igie_common/requirements.txt +python3 ../../igie_common/export.py --model-name regnet_x_800mf --weight regnet_x_800mf-94a99ebd.pth --output regnet_x_800mf.onnx \ No newline at end of file diff --git a/models/cv/classification/regnet_x_800mf/igie/scripts/infer_regnet_x_800mf_fp16_accuracy.sh b/models/cv/classification/regnet_x_800mf/igie/scripts/infer_regnet_x_800mf_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..38ad905adaafa16ba728acf75548a1fe310c2bc0 --- /dev/null +++ b/models/cv/classification/regnet_x_800mf/igie/scripts/infer_regnet_x_800mf_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="regnet_x_800mf.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 ${RUN_DIR}build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path regnet_x_800mf_bs_${batchsize}_fp16.so + + +# inference +python3 ${RUN_DIR}inference.py \ + --engine regnet_x_800mf_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/regnet_x_800mf/igie/scripts/infer_regnet_x_800mf_fp16_performance.sh b/models/cv/classification/regnet_x_800mf/igie/scripts/infer_regnet_x_800mf_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..2f24950c8ad56f2e7e776a0b26ffb593911af8be --- /dev/null +++ b/models/cv/classification/regnet_x_800mf/igie/scripts/infer_regnet_x_800mf_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="regnet_x_800mf.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 ${RUN_DIR}build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path regnet_x_800mf_bs_${batchsize}_fp16.so + + +# inference +python3 ${RUN_DIR}inference.py \ + --engine regnet_x_800mf_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/classification/regnet_x_8gf/igie/README.md b/models/cv/classification/regnet_x_8gf/igie/README.md new file mode 100755 index 0000000000000000000000000000000000000000..312ee63fedc87abde8a953ecb5ecc882576bfe6e --- /dev/null +++ b/models/cv/classification/regnet_x_8gf/igie/README.md @@ -0,0 +1,54 @@ +# RegNet_x_8gf (IGIE) + +## Model Description + +RegNet_x_8gf is a deep convolutional neural network from the RegNet family, introduced in the paper "Designing Network Design Spaces" by Facebook AI. RegNet models emphasize simplicity, efficiency, and scalability, and they systematically explore design spaces to achieve optimal performance. The "x" in RegNet_x_8gf indicates it belongs to the RegNetX series, which focuses on optimizing network width and depth, while "8gf" refers to its computational complexity of approximately 8 GFLOPs. The model features linear width scaling, group convolutions, and bottleneck blocks, offering a strong balance between high accuracy and computational efficiency. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | 4.3.0 | 25.09 | +| MR-V100 | 4.2.0 | 25.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: to download the validation dataset. + +### Install Dependencies + +```bash +pip3 install -r ../../igie_common/requirements.txt +``` + +### Model Conversion + +```bash +python3 ../../igie_common/export.py --model-name regnet_x_8gf --weight regnet_x_8gf-03ceed89.pth --output regnet_x_8gf.onnx +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/imagenet_val/ +export RUN_DIR=../../igie_common/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_regnet_x_8gf_fp16_accuracy.sh +# Performance +bash scripts/infer_regnet_x_8gf_fp16_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| :----: | :----: | :----: | :----: | :----: | :----: | +| RegNet_x_8gf | 32 | FP16 | 1134.327 | 79.293 | 94.662 | diff --git a/models/cv/classification/regnet_x_8gf/igie/ci/prepare.sh b/models/cv/classification/regnet_x_8gf/igie/ci/prepare.sh new file mode 100755 index 0000000000000000000000000000000000000000..88cf4b19d895734b8b2ab77233f6d44c14a0d431 --- /dev/null +++ b/models/cv/classification/regnet_x_8gf/igie/ci/prepare.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install -r ../../igie_common/requirements.txt +python3 ../../igie_common/export.py --model-name regnet_x_8gf --weight regnet_x_8gf-03ceed89.pth --output regnet_x_8gf.onnx \ No newline at end of file diff --git a/models/cv/classification/regnet_x_8gf/igie/scripts/infer_regnet_x_8gf_fp16_accuracy.sh b/models/cv/classification/regnet_x_8gf/igie/scripts/infer_regnet_x_8gf_fp16_accuracy.sh new file mode 100755 index 0000000000000000000000000000000000000000..ea187d40123b6aa2d20ab03ea0ef683a47b1e0f4 --- /dev/null +++ b/models/cv/classification/regnet_x_8gf/igie/scripts/infer_regnet_x_8gf_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="regnet_x_8gf.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 ${RUN_DIR}build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path regnet_x_8gf_bs_${batchsize}_fp16.so + + +# inference +python3 ${RUN_DIR}inference.py \ + --engine regnet_x_8gf_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/classification/regnet_x_8gf/igie/scripts/infer_regnet_x_8gf_fp16_performance.sh b/models/cv/classification/regnet_x_8gf/igie/scripts/infer_regnet_x_8gf_fp16_performance.sh new file mode 100755 index 0000000000000000000000000000000000000000..886eadf58531c1958442db960e338ba3b6e6b075 --- /dev/null +++ b/models/cv/classification/regnet_x_8gf/igie/scripts/infer_regnet_x_8gf_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="regnet_x_8gf.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 ${RUN_DIR}build_engine.py \ + --model_path ${model_path} \ + --input input:${batchsize},3,224,224 \ + --precision fp16 \ + --engine_path regnet_x_8gf_bs_${batchsize}_fp16.so + + +# inference +python3 ${RUN_DIR}inference.py \ + --engine regnet_x_8gf_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name input \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/object_detection/yolov11/igie/README.md b/models/cv/object_detection/yolov11/igie/README.md index 5ab28d73e97d0d3f10b32ca71b96f9fda78893e9..ee365f4bc9ed7e7630d4454fd9d7edec5f9f1d95 100644 --- a/models/cv/object_detection/yolov11/igie/README.md +++ b/models/cv/object_detection/yolov11/igie/README.md @@ -75,11 +75,21 @@ bash scripts/infer_yolov11_fp16_accuracy.sh bash scripts/infer_yolov11_fp16_performance.sh ``` +### INT8 + +```bash +# Accuracy +bash scripts/infer_yolov11_int8_accuracy.sh +# Performance +bash scripts/infer_yolov11_int8_performance.sh +``` + ## Model Results | Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | | ------- | --------- | --------- | ------- | ------- | ------------ | -| YOLOv11 | 32 | FP16 | 1519.25 | 0.551 | 0.393 | +| YOLOv11 | 32 | FP16 | 1328.49 | 0.551 | 0.393 | +| YOLOv11 | 32 | INT8 | 1538.63 | 0.506 | 0.349 | ## References diff --git a/models/cv/object_detection/yolov11/igie/quantize.py b/models/cv/object_detection/yolov11/igie/quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..7f019ab2b59915aa70173bba6f75114506ae7333 --- /dev/null +++ b/models/cv/object_detection/yolov11/igie/quantize.py @@ -0,0 +1,163 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import onnx +import psutil +import argparse +import numpy as np +from pathlib import Path + +import torch + +from onnxruntime.quantization import (CalibrationDataReader, QuantFormat, + quantize_static, QuantType, + CalibrationMethod) + +from ultralytics.cfg import get_cfg +from ultralytics.utils import DEFAULT_CFG +from ultralytics.data.utils import check_det_dataset +from ultralytics.models.yolo.detect import DetectionValidator + +class CalibrationDataLoader(CalibrationDataReader): + def __init__(self, input_name, dataloader, cnt_limit=100): + self.cnt = 0 + self.input_name = input_name + self.cnt_limit = cnt_limit + self.dataloader = dataloader + self.iter = iter(dataloader) + + # avoid oom + @staticmethod + def _exceed_memory_upper_bound(upper_bound=80): + info = psutil.virtual_memory() + total_percent = info.percent + if total_percent >= upper_bound: + return True + return False + + def get_next(self): + if self._exceed_memory_upper_bound() or self.cnt >= self.cnt_limit: + return None + self.cnt += 1 + print(f"onnx calibration data count: {self.cnt}") + input_info = next(self.iter) + + ort_input = {self.input_name[0]: input_info.numpy()} + + return ort_input + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--out_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="calibration datasets path.") + + parser.add_argument("--batch", + type=int, + default=32, + help="batchsize of the model.") + + args = parser.parse_args() + + return args + +class PreProcessDatasets(DetectionValidator): + def __call__(self, data): + self.data = data + self.stride = 32 + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + + datasets = [] + length = 0 + + for batch in self.dataloader: + data = self.preprocess(batch)['img'] + datasets.append(data[0]) + length += data.shape[0] + + if length >= 200: + break + + return datasets + +class CalibrationDataset(torch.utils.data.Dataset): + def __init__(self, datasets): + self.datasets = datasets + + def __len__(self): + return len(self.datasets) + + def __getitem__(self, index): + return self.datasets[index] + + +def main(): + args = parse_args() + + model = onnx.load(args.model_path) + input_names = [input.name for input in model.graph.input] + + overrides = {'mode': 'val'} + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = 1 + cfg_args.save_json = True + + data = { + 'path': Path(args.datasets), + 'val': os.path.join(args.datasets, 'val2017.txt') + } + + validator = PreProcessDatasets(args=cfg_args, save_dir=Path('.')) + + datasets = CalibrationDataset(validator(data)) + + data_loader = torch.utils.data.DataLoader(dataset=datasets, batch_size=args.batch) + + cnt_limit = int(20 / args.batch) + 1 + + calibration = CalibrationDataLoader(input_names, data_loader, cnt_limit=cnt_limit) + + quantize_static(args.model_path, + args.out_path, + calibration_data_reader=calibration, + quant_format=QuantFormat.QOperator, + per_channel=False, + activation_type=QuantType.QInt8, + weight_type=QuantType.QInt8, + use_external_data_format=False, + op_types_to_quantize = ['Conv'], + calibrate_method=CalibrationMethod.Percentile, + extra_options = { + 'ActivationSymmetric': True, + 'WeightSymmetric': True + } + ) + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/yolov11/igie/requirements.txt b/models/cv/object_detection/yolov11/igie/requirements.txt index 72c3e77cc563c17d86fe72609162b2b17e91c72f..81155ab83edbbb0c115b52d40499715d30756608 100644 --- a/models/cv/object_detection/yolov11/igie/requirements.txt +++ b/models/cv/object_detection/yolov11/igie/requirements.txt @@ -1,4 +1,3 @@ tqdm -onnx==1.13.0 -onnxsim==0.4.36 +onnx==1.16.0 ultralytics==8.3.59 diff --git a/models/cv/object_detection/yolov11/igie/scripts/infer_yolov11_int8_accuracy.sh b/models/cv/object_detection/yolov11/igie/scripts/infer_yolov11_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..2fa4680b4b1d01358126e95e4d8c7f7393b67aa0 --- /dev/null +++ b/models/cv/object_detection/yolov11/igie/scripts/infer_yolov11_int8_accuracy.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11n_opt.onnx" +quantized_model_path="yolo11n_int8.onnx" +datasets_path=${DATASETS_DIR} + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --batch ${batchsize} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolo11n_bs_${batchsize}_int8.so + +# inference +python3 inference.py \ + --engine yolo11n_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} diff --git a/models/cv/object_detection/yolov11/igie/scripts/infer_yolov11_int8_performance.sh b/models/cv/object_detection/yolov11/igie/scripts/infer_yolov11_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..8cc48c778ad37168439ee4c96a9627dc2ec80222 --- /dev/null +++ b/models/cv/object_detection/yolov11/igie/scripts/infer_yolov11_int8_performance.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolo11n_opt.onnx" +quantized_model_path="yolo11n_int8.onnx" +datasets_path=${DATASETS_DIR} + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --batch ${batchsize} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolo11n_bs_${batchsize}_int8.so + +# inference +python3 inference.py \ + --engine yolo11n_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True diff --git a/models/cv/object_detection/yolov8n/igie/README.md b/models/cv/object_detection/yolov8n/igie/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a826140b8c6f233858b9a7ac833ea6db919c301e --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/README.md @@ -0,0 +1,98 @@ +# YOLOv8 (IGIE) + +## Model Description + +YOLOv8n combines exceptional speed and competitive accuracy in real-time object detection tasks. With a focus on simplicity and efficiency, this compact model employs a single neural network to make predictions, enabling rapid and reliable identification of objects in images or video streams, making it ideal for resource-constrained environments. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| MR-V100 | 4.3.0 | 25.09 | +| MR-V100 | 4.2.0 | 25.03 | + +## Model Preparation + +### Prepare Resources + +Pretrained model: + +Dataset: + - to download the labels dataset. + - to download the validation dataset. + - to download the train dataset. + +```bash +unzip -q -d ./ coco2017labels.zip +unzip -q -d ./coco/images/ train2017.zip +unzip -q -d ./coco/images/ val2017.zip + +coco +├── annotations +│   └── instances_val2017.json +├── images +│   ├── train2017 +│   └── val2017 +├── labels +│   ├── train2017 +│   └── val2017 +├── LICENSE +├── README.txt +├── test-dev2017.txt +├── train2017.cache +├── train2017.txt +├── val2017.cache +└── val2017.txt +``` + +### Install Dependencies + +Contact the Iluvatar administrator to get the missing packages: +- mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r requirements.txt +``` + +### Model Conversion + +```bash +python3 export.py --weight yolov8n.pt --batch 32 +``` + +## Model Inference + +```bash +export DATASETS_DIR=/Path/to/coco/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_yolov8n_fp16_accuracy.sh +# Performance +bash scripts/infer_yolov8n_fp16_performance.sh +``` + +### INT8 + +```bash +# Accuracy +bash scripts/infer_yolov8n_int8_accuracy.sh +# Performance +bash scripts/infer_yolov8n_int8_performance.sh +``` + +## Model Results + +| Model | BatchSize | Precision | FPS | MAP@0.5 | MAP@0.5:0.95 | +| :----: | :----: | :----: | :----: | :----: | :----: | +| YOLOv8n | 32 | FP16 | 1149.667 | 0.526 | 0.373 | +| YOLOv8n | 32 | INT8 | 1212.266 | 0.503 | 0.348 | diff --git a/models/cv/object_detection/yolov8n/igie/ci/prepare.sh b/models/cv/object_detection/yolov8n/igie/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..62fe1aec4c89e23bae77336c57e12c23cde050b8 --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install -r requirements.txt + +python3 export.py --weight yolov8n.pt --batch 32 diff --git a/models/cv/object_detection/yolov8n/igie/export.py b/models/cv/object_detection/yolov8n/igie/export.py new file mode 100644 index 0000000000000000000000000000000000000000..7c3e8fb055d239cf94b8fb4807486af554d046d5 --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/export.py @@ -0,0 +1,44 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ultralytics import YOLO +import torch + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--weight", + type=str, + required=True, + help="pytorch model weight.") + + parser.add_argument("--batch", + type=int, + required=True, + help="batchsize of the model.") + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = YOLO(args.weight).cpu() + + model.export(format='onnx', batch=args.batch, dynamic=True, imgsz=(640, 640), optimize=True, simplify=True, opset=13) + +if __name__ == "__main__": + main() diff --git a/models/cv/object_detection/yolov8n/igie/inference.py b/models/cv/object_detection/yolov8n/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..c65238a719e9674892053c4d46e88cfbddb6eca9 --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/inference.py @@ -0,0 +1,210 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import argparse +import tvm +import json +import torch +import numpy as np +from tvm import relay +from tqdm import tqdm + +from pathlib import Path + +from ultralytics.cfg import get_cfg +from ultralytics.data import converter +from ultralytics.utils import DEFAULT_CFG +from ultralytics.data.utils import check_det_dataset +from ultralytics.utils.metrics import ConfusionMatrix +from ultralytics.models.yolo.detect import DetectionValidator + +coco_classes = { + 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', + 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', + 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', + 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', + 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', + 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', + 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', + 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush' +} + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--conf", + type=float, + default=0.001, + help="confidence threshold.") + + parser.add_argument("--iou", + type=float, + default=0.65, + help="iou threshold.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +class IGIE_Validator(DetectionValidator): + def __call__(self, engine, device, data): + self.data = data + self.stride = 32 + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + self.init_metrics() + + total_num = 0 + + # wram up + for _ in range(3): + engine.run() + + for batch in tqdm(self.dataloader): + batch = self.preprocess(batch) + + imgs = batch['img'] + pad_batch = len(imgs) != self.args.batch + if pad_batch: + origin_size = len(imgs) + imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:])) + + engine.set_input(0, tvm.nd.array(imgs, device)) + + engine.run() + + + total_num += self.args.batch + outputs = engine.get_output(0).asnumpy() + + if pad_batch: + outputs = outputs[:origin_size] + + outputs = torch.from_numpy(outputs) + + preds = self.postprocess([outputs]) + + self.update_metrics(preds, batch) + + stats = self.get_stats() + + if self.args.save_json and self.jdict: + with open(str(self.save_dir / 'predictions.json'), 'w') as f: + print(f'Saving {f.name} ...') + json.dump(self.jdict, f) # flatten and save + + stats = self.eval_json(stats) + + return stats + + def init_metrics(self): + """Initialize evaluation metrics for YOLO.""" + val = self.data.get(self.args.split, '') # validation path + self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO + self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000)) + self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO + self.names = self.data['names'] + self.nc = len(self.names) + self.metrics.names = self.names + self.confusion_matrix = ConfusionMatrix(nc=80) + self.seen = 0 + self.jdict = [] + self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[]) + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + overrides = {'mode': 'val'} + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = batch_size + cfg_args.save_json = True + + data = { + 'path': Path(args.datasets), + 'val': os.path.join(args.datasets, 'val2017.txt'), + 'names': coco_classes + } + + validator = IGIE_Validator(args=cfg_args, save_dir=Path('.')) + + stats = validator(module, device, data) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/yolov8n/igie/quantize.py b/models/cv/object_detection/yolov8n/igie/quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..90b0415ed4af29a186a8663d754640cd98db85bc --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/quantize.py @@ -0,0 +1,164 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import onnx +import psutil +import argparse +import numpy as np +from pathlib import Path + +import torch + +from onnxruntime.quantization import (CalibrationDataReader, QuantFormat, + quantize_static, QuantType, + CalibrationMethod) + +from ultralytics.cfg import get_cfg +from ultralytics.utils import DEFAULT_CFG +from ultralytics.data.utils import check_det_dataset +from ultralytics.models.yolo.detect import DetectionValidator + +class CalibrationDataLoader(CalibrationDataReader): + def __init__(self, input_name, dataloader, cnt_limit=100): + self.cnt = 0 + self.input_name = input_name + self.cnt_limit = cnt_limit + self.dataloader = dataloader + self.iter = iter(dataloader) + + # avoid oom + @staticmethod + def _exceed_memory_upper_bound(upper_bound=80): + info = psutil.virtual_memory() + total_percent = info.percent + if total_percent >= upper_bound: + return True + return False + + def get_next(self): + if self._exceed_memory_upper_bound() or self.cnt >= self.cnt_limit: + return None + self.cnt += 1 + print(f"onnx calibration data count: {self.cnt}") + input_info = next(self.iter) + + ort_input = {self.input_name[0]: input_info.numpy()} + + return ort_input + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--out_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="calibration datasets path.") + + parser.add_argument("--batch", + type=int, + default=32, + help="batchsize of the model.") + + args = parser.parse_args() + + return args + +class PreProcessDatasets(DetectionValidator): + def __call__(self, data): + self.data = data + self.stride = 32 + self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch) + + datasets = [] + length = 0 + + for batch in self.dataloader: + data = self.preprocess(batch)['img'] + datasets.append(data[0]) + length += data.shape[0] + + if length >= 200: + break + + return datasets + +class CalibrationDataset(torch.utils.data.Dataset): + def __init__(self, datasets): + self.datasets = datasets + + def __len__(self): + return len(self.datasets) + + def __getitem__(self, index): + return self.datasets[index] + + +def main(): + args = parse_args() + + model = onnx.load(args.model_path) + input_names = [input.name for input in model.graph.input] + + overrides = {'mode': 'val'} + cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + + cfg_args.batch = 1 + cfg_args.save_json = True + + data = { + 'path': Path(args.datasets), + 'val': os.path.join(args.datasets, 'val2017.txt') + } + + validator = PreProcessDatasets(args=cfg_args, save_dir=Path('.')) + + datasets = CalibrationDataset(validator(data)) + + data_loader = torch.utils.data.DataLoader(dataset=datasets, batch_size=args.batch) + + cnt_limit = int(20 / args.batch) + 1 + + calibration = CalibrationDataLoader(input_names, data_loader, cnt_limit=cnt_limit) + + quantize_static(args.model_path, + args.out_path, + calibration_data_reader=calibration, + quant_format=QuantFormat.QOperator, + op_types_to_quantize=['Conv'], + per_channel=False, + activation_type=QuantType.QInt8, + weight_type=QuantType.QInt8, + use_external_data_format=False, + nodes_to_exclude=['/model.22/Add_9', '/model.22/Concat_24', '/model.22/Concat_25', '/model.22/Mul_3', '/model.22/dfl/Softmax'], + calibrate_method=CalibrationMethod.Percentile, + extra_options = { + 'ActivationSymmetric': True, + 'WeightSymmetric': True + } + ) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/yolov8n/igie/requirements.txt b/models/cv/object_detection/yolov8n/igie/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d69fe4dc8f45e4ac14e980b6b0c498132fb8f915 --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/requirements.txt @@ -0,0 +1,6 @@ +tqdm +onnx +pycocotools +# FAILed in 8.2.51 +ultralytics==8.1.34 +opencv-python diff --git a/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_fp16_accuracy.sh b/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..cc2447a75fa117506a4c090cdc83eccba7a817b5 --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_fp16_accuracy.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov8n.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolov8n_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolov8n_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_fp16_performance.sh b/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..79af1162e88c2615e05129352d960a5676827fcb --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_fp16_performance.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov8n.onnx" +datasets_path=${DATASETS_DIR} + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${model_path} \ + --input images:${batchsize},3,640,640 \ + --precision fp16 \ + --engine_path yolov8n_bs_${batchsize}_fp16.so + + +# inference +python3 inference.py \ + --engine yolov8n_bs_${batchsize}_fp16.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_int8_accuracy.sh b/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..aeb9e620dc57d4aa736f0c39c75a4a2b5f790040 --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_int8_accuracy.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov8n.onnx" +quantized_model_path="yolov8n_int8.onnx" +datasets_path=${DATASETS_DIR} + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --batch ${batchsize} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolov8n_bs_${batchsize}_int8.so + +# inference +python3 inference.py \ + --engine yolov8n_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_int8_performance.sh b/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..d180276c03962a8c5f021b145601900bb0d94fb5 --- /dev/null +++ b/models/cv/object_detection/yolov8n/igie/scripts/infer_yolov8n_int8_performance.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov8n.onnx" +quantized_model_path="yolov8n_int8.onnx" +datasets_path=${DATASETS_DIR} + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --batch ${batchsize} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolov8n_bs_${batchsize}_int8.so + +# inference +python3 inference.py \ + --engine yolov8n_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True diff --git a/models/cv/object_detection/yolov9/igie/README.md b/models/cv/object_detection/yolov9/igie/README.md index a859f82b76871cf64f9006481d1bfdccd9d8b140..89786550905e427000b24b94eb97a13f3d6abc3c 100644 --- a/models/cv/object_detection/yolov9/igie/README.md +++ b/models/cv/object_detection/yolov9/igie/README.md @@ -79,7 +79,8 @@ bash scripts/infer_yolov9_fp16_performance.sh | Model | BatchSize | Precision | FPS | IOU@0.5 | IOU@0.5:0.95 | | ------ | --------- | --------- | ------ | ------- | ------------ | -| YOLOv9 | 32 | FP16 | 814.42 | 0.625 | 0.464 | +| YOLOv9 | 32 | FP16 | 548.715 | 0.625 | 0.464 | +| YOLOv9 | 32 | INT8 | 678.959 | 0.559 | 0.347 | ## References diff --git a/models/cv/object_detection/yolov9/igie/inference.py b/models/cv/object_detection/yolov9/igie/inference.py index cbed1c03c2631c6807922d699d6c1994331b4e7d..64b744642d648617bb0ab3e9a007459b944cb045 100644 --- a/models/cv/object_detection/yolov9/igie/inference.py +++ b/models/cv/object_detection/yolov9/igie/inference.py @@ -15,7 +15,7 @@ import argparse import os - +import torch import tvm from tvm import relay @@ -26,7 +26,7 @@ from ultralytics.cfg import get_cfg from ultralytics.utils import DEFAULT_CFG from validator import IGIE_Validator - +from utils import COCO2017Dataset, COCO2017Evaluator def parse_args(): parser = argparse.ArgumentParser() @@ -75,6 +75,17 @@ def parse_args(): return args +def get_dataloader(data_path, label_path, batch_size, num_workers): + + dataset = COCO2017Dataset(data_path, label_path, image_size=640) + + dataloader = torch.utils.data.DataLoader(dataset, + batch_size=batch_size, + drop_last=False, + num_workers=num_workers, + collate_fn=dataset.collate_fn) + return dataloader + def main(): args = parse_args() diff --git a/models/cv/object_detection/yolov9/igie/quantize.py b/models/cv/object_detection/yolov9/igie/quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..3c7a71ee308f804a524d3087c0fac1cdb2610aaf --- /dev/null +++ b/models/cv/object_detection/yolov9/igie/quantize.py @@ -0,0 +1,108 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import onnx +import psutil +import argparse +import numpy as np +from inference import get_dataloader +from onnxruntime.quantization import (CalibrationDataReader, QuantFormat, + quantize_static, QuantType, + CalibrationMethod) + +class CalibrationDataLoader(CalibrationDataReader): + def __init__(self, input_name, dataloader, cnt_limit=100): + self.cnt = 0 + self.input_name = input_name + self.cnt_limit = cnt_limit + self.iter = iter(dataloader) + + # avoid oom + @staticmethod + def _exceed_memory_upper_bound(upper_bound=80): + info = psutil.virtual_memory() + total_percent = info.percent + if total_percent >= upper_bound: + return True + return False + + def get_next(self): + if self._exceed_memory_upper_bound() or self.cnt >= self.cnt_limit: + return None + self.cnt += 1 + print(f"onnx calibration data count: {self.cnt}") + input_info = next(self.iter) + + ort_input = {k: np.array(v) for k, v in zip(self.input_name, input_info)} + return ort_input + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--model_path", + type=str, + required=True, + help="original model path.") + + parser.add_argument("--out_path", + type=str, + required=True, + help="igie export engine path.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="calibration datasets path.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + + model = onnx.load(args.model_path) + input_names = [input.name for input in model.graph.input] + + data_path = os.path.join(args.datasets, "images", "val2017") + label_path = os.path.join(args.datasets, "annotations", "instances_val2017.json") + + dataloader = get_dataloader(data_path, label_path, batch_size=1, num_workers=args.num_workers) + calibration = CalibrationDataLoader(input_names, dataloader, cnt_limit=20) + + quantize_static(args.model_path, + args.out_path, + calibration_data_reader=calibration, + quant_format=QuantFormat.QOperator, + per_channel=False, + activation_type=QuantType.QInt8, + weight_type=QuantType.QInt8, + use_external_data_format=False, + nodes_to_exclude= ['/model.22/Add_10', '/model.22/Add_11', '/model.22/Add_9', '/model.22/Concat_24', '/model.22/Concat_25', '/model.22/Mul_4', '/model.22/Mul_5', '/model.22/dfl/Softmax'], + calibrate_method=CalibrationMethod.Percentile, + extra_options = { + 'ActivationSymmetric': True, + 'WeightSymmetric': True + } + ) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/object_detection/yolov9/igie/scripts/infer_yolov9_int8_accuracy.sh b/models/cv/object_detection/yolov9/igie/scripts/infer_yolov9_int8_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..63da94389900125410e11772020483b07076163a --- /dev/null +++ b/models/cv/object_detection/yolov9/igie/scripts/infer_yolov9_int8_accuracy.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov9s.onnx" +quantized_model_path="yolov9s_int8.onnx" +datasets_path=${DATASETS_DIR} + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolov9s_bs_${batchsize}_int8.so + + +# inference +python3 inference.py \ + --engine yolov9s_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ No newline at end of file diff --git a/models/cv/object_detection/yolov9/igie/scripts/infer_yolov9_int8_performance.sh b/models/cv/object_detection/yolov9/igie/scripts/infer_yolov9_int8_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..2f103c72b23918d6c98d330b9b1d72115bd775a1 --- /dev/null +++ b/models/cv/object_detection/yolov9/igie/scripts/infer_yolov9_int8_performance.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +batchsize=32 +model_path="yolov9s.onnx" +quantized_model_path="yolov9s_int8.onnx" +datasets_path=${DATASETS_DIR} + +if [ ! -e $quantized_model_path ]; then + # quantize model to int8 + python3 quantize.py \ + --model_path ${model_path} \ + --out_path ${quantized_model_path} \ + --datasets ${datasets_path} +fi + +# build engine +python3 ../../igie_common/build_engine.py \ + --model_path ${quantized_model_path} \ + --input images:${batchsize},3,640,640 \ + --precision int8 \ + --engine_path yolov9s_bs_${batchsize}_int8.so + + +# inference +python3 inference.py \ + --engine yolov9s_bs_${batchsize}_int8.so \ + --batchsize ${batchsize} \ + --input_name images \ + --datasets ${datasets_path} \ + --perf_only True \ No newline at end of file diff --git a/models/cv/object_detection/yolov9/igie/utils.py b/models/cv/object_detection/yolov9/igie/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d3d146399d28635e64bac3541371718bfa2830de --- /dev/null +++ b/models/cv/object_detection/yolov9/igie/utils.py @@ -0,0 +1,436 @@ +# Copyright, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import cv2 +import json +import torch +import torchvision +import numpy as np + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +coco80_to_coco91 = [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, + 89, 90 +] + +coco80_to_coco91_dict = {idx: i for idx, i in enumerate(coco80_to_coco91)} +coco91_to_coco80_dict = {i: idx for idx, i in enumerate(coco80_to_coco91)} + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)): + # Resize and pad image while meeting stride-multiple constraints + # current shape [height, width] + + shape = im.shape[:2] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + + # Compute padding + ratio = r, r + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] + + dw /= 2 + dh /= 2 + + if shape[::-1] != new_unpad: + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, + top, + bottom, + left, + right, + cv2.BORDER_CONSTANT, + value=color) + return im, ratio, (dw, dh) + +def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): + # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x + y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y + y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x + y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y + return y + +def clip_boxes(boxes, shape): + # Clip boxes (xyxy) to image shape (height, width) + if isinstance(boxes, torch.Tensor): # faster individually + boxes[:, 0].clamp_(0, shape[1]) # x1 + boxes[:, 1].clamp_(0, shape[0]) # y1 + boxes[:, 2].clamp_(0, shape[1]) # x2 + boxes[:, 3].clamp_(0, shape[0]) # y2 + else: # np.array (faster grouped) + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + +def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right + if clip: + clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center + y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center + y[:, 2] = (x[:, 2] - x[:, 0]) / w # width + y[:, 3] = (x[:, 3] - x[:, 1]) / h # height + return y + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + +def box_area(box): + return (box[2] - box[0]) * (box[3] - box[1]) + +def box_iou(box1, box2, eps=1e-7): + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1) + inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) + + # IoU = inter / (area1 + area2 - inter) + return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps) + +def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): + # Rescale boxes (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], + img1_shape[1] / img0_shape[1]) + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, ( + img1_shape[0] - img0_shape[0] * gain) / 2 + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + clip_boxes(boxes, img0_shape) + + return boxes + + +class COCO2017Dataset(torch.utils.data.Dataset): + def __init__(self, + image_dir_path, + label_json_path, + image_size=640, + pad_color=114, + val_mode=True, + input_layout="NCHW"): + + self.image_dir_path = image_dir_path + self.label_json_path = label_json_path + self.image_size = image_size + self.pad_color = pad_color + self.val_mode = val_mode + self.input_layout = input_layout + + self.coco = COCO(annotation_file=self.label_json_path) + + if self.val_mode: + self.img_ids = list(sorted(self.coco.imgs.keys())) + else: + self.img_ids = sorted(list(self.coco.imgToAnns.keys())) + + def __len__(self): + return len(self.img_ids) + + def __getitem__(self, index): + img_path = self._get_image_path(index) + img, (h0, w0), (h, w) = self._load_image(index) + + img, ratio, pad = letterbox(img, + self.image_size, + color=(self.pad_color, self.pad_color, self.pad_color)) + shapes = (h0, w0), ((h / h0, w / w0), pad) + + # load label + raw_label = self._load_json_label(index) + # normalized xywh to pixel xyxy format + raw_label[:, 1:] = xywhn2xyxy(raw_label[:, 1:], + ratio[0] * w, + ratio[1] * h, + padw=pad[0], + padh=pad[1]) + + raw_label[:, 1:] = xyxy2xywhn(raw_label[:, 1:], + w=img.shape[1], + h=img.shape[0], + clip=True, + eps=1E-3) + + nl = len(raw_label) + labels_out = np.zeros((nl, 6)) + labels_out[:, 1:] = raw_label + + # HWC to CHW, BGR to RGB + img = img.transpose((2, 0, 1))[::-1] + img = np.ascontiguousarray(img) / 255.0 + if self.input_layout == "NHWC": + img = img.transpose((1, 2, 0)) + + return img, labels_out, img_path, shapes + + def _get_image_path(self, index): + idx = self.img_ids[index] + path = self.coco.loadImgs(idx)[0]["file_name"] + img_path = os.path.join(self.image_dir_path, path) + return img_path + + def _load_image(self, index): + img_path = self._get_image_path(index) + + im = cv2.imread(img_path) + h0, w0 = im.shape[:2] + r = self.image_size / max(h0, w0) + if r != 1: + im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_LINEAR) + return im.astype("float32"), (h0, w0), im.shape[:2] + + def _load_json_label(self, index): + _, (h0, w0), _ = self._load_image(index) + + idx = self.img_ids[index] + ann_ids = self.coco.getAnnIds(imgIds=idx) + targets = self.coco.loadAnns(ids=ann_ids) + + labels = [] + for target in targets: + cat = target["category_id"] + coco80_cat = coco91_to_coco80_dict[cat] + cat = np.array([[coco80_cat]]) + + x, y, w, h = target["bbox"] + x1, y1, x2, y2 = x, y, int(x + w), int(y + h) + xyxy = np.array([[x1, y1, x2, y2]]) + xywhn = xyxy2xywhn(xyxy, w0, h0) + labels.append(np.hstack((cat, xywhn))) + + if labels: + labels = np.vstack(labels) + else: + if self.val_mode: + labels = np.zeros((1, 5)) + else: + raise ValueError(f"set val_mode = False to use images with labels") + + return labels + + @staticmethod + def collate_fn(batch): + im, label, path, shapes = zip(*batch) + for i, lb in enumerate(label): + lb[:, 0] = i + return np.concatenate([i[None] for i in im], axis=0), np.concatenate(label, 0), path, shapes + + +def non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=True, + labels=(), + max_det=300, + nm=0, # number of masks +): + """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + + if isinstance(prediction, (list, tuple)): + prediction = prediction[0] + + bs = prediction.shape[0] # batch size + nc = prediction.shape[2] - nm - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 0.5 + 0.05 * bs # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + mi = 5 + nc + output = [torch.zeros((0, 6 + nm))] * bs + for xi, x in enumerate(prediction): + + x = x[xc[xi]] + + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + nm + 5), device=x.device) + v[:, :4] = lb[:, 1:5] + v[:, 4] = 1.0 + v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] + + # Box/Mask + box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2) + mask = x[:, mi:] + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat( + (box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1) + else: # best class only + conf, j = x[:, 5:mi].max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), mask), + 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: + x = x[x[:, 4].argsort(descending=True)[:max_nms]] + else: + x = x[x[:, 4].argsort(descending=True)] + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: + i = i[:max_det] + if merge and (1 < n < 3E3): + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres + weights = iou * scores[None] + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) + if redundant: + i = i[iou.sum(1) > 1] + + output[xi] = x[i] + return output + +def get_coco_accuracy(pred_json, ann_json): + coco = COCO(annotation_file=ann_json) + coco_pred = coco.loadRes(pred_json) + + coco_evaluator = COCOeval(cocoGt=coco, cocoDt=coco_pred, iouType="bbox") + + coco_evaluator.evaluate() + coco_evaluator.accumulate() + coco_evaluator.summarize() + return coco_evaluator.stats + +class COCO2017Evaluator: + def __init__(self, + label_path, + image_size=640, + conf_thres=0.001, + iou_thres=0.65): + + self.conf_thres = conf_thres + self.iou_thres = iou_thres + self.label_path = label_path + self.image_size = image_size + + self.jdict = [] + + # iou vector for mAP@0.5:0.95 + self.iouv = torch.linspace(0.5, 0.95, 10) + self.niou = self.iouv.numel() + + def evaluate(self, pred, all_inputs, nms_count=None): + im = all_inputs[0] + targets = all_inputs[1] + paths = all_inputs[2] + shapes = all_inputs[3] + + _, _, height, width = im.shape + targets[:, 2:] *= np.array((width, height, width, height)) + + pred = torch.from_numpy(pred) + pred = non_max_suppression(pred, self.conf_thres, self.iou_thres) + + for idx, det in enumerate(pred): + img_path = paths[idx] + + predn = det + shape = shapes[idx][0] + scale_boxes(im[idx].shape[1:], predn[:, :4], shape, shapes[idx][1]) # native-space pred + self._save_one_json(predn, self.jdict, img_path, coco80_to_coco91) # append to COCO-JSON dictionary + + + def _save_one_json(self, predn, jdict, path, class_map): + # Save one JSON result in the format + image_id = int(os.path.splitext(os.path.basename(path))[0]) + box = xyxy2xywh(predn[:, :4]) + box[:, :2] -= box[:, 2:] / 2 + for p, b in zip(predn.tolist(), box.tolist()): + jdict.append({ + 'image_id': image_id, + 'category_id': class_map[int(p[5])], + 'bbox': [round(x, 3) for x in b], + 'score': round(p[4], 5) + }) + + + def summary(self): + if len(self.jdict): + pred_json = os.path.join("coco2017_predictions.json") + with open(pred_json, 'w') as f: + json.dump(self.jdict, f) + result = get_coco_accuracy(pred_json, self.label_path) + else: + raise ValueError("can not find generated json dict for pycocotools") + return result +