diff --git a/README.md b/README.md
index 90c697a015b9f333988e3a205fb5176f0b4749b8..a65a7d7c0605ee94158de6dd0a9277a4971ec75e 100644
--- a/README.md
+++ b/README.md
@@ -54,7 +54,29 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
-
+
+ ConvNeXt-Small |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
+
+ CSPDarkNet50 |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
+
CSPResNet50 |
FP16 |
- |
@@ -65,6 +87,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
Supported |
+
+ DeiT-tiny |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
DenseNet121 |
FP16 |
@@ -87,6 +120,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
+
+ DenseNet169 |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
EfficientNet-B0 |
FP16 |
@@ -99,7 +143,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
Supported |
- EfficientNet_B1 |
+ EfficientNet-B1 |
FP16 |
Supported |
Supported |
@@ -110,9 +154,20 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
Supported |
- EfficientNetV2 |
+ EfficientNet-B2 |
FP16 |
+ Supported |
- |
+
+
+ INT8 |
+ - |
+ - |
+
+
+ EfficientNetV2 |
+ FP16 |
+ Supported |
Supported |
@@ -222,7 +277,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
RepVGG |
FP16 |
- - |
+ Supported |
Supported |
@@ -329,6 +384,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
+
+ SEResNet50 |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
ShuffleNetV1 |
FP16 |
@@ -351,6 +417,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
+
+ ShuffleNetV2_x1_0 |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
SqueezeNet 1.0 |
FP16 |
@@ -417,6 +494,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
IGIE |
IxRT |
+
+ ATSS |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
CenterNet |
FP16 |
@@ -442,7 +530,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
FCOS |
FP16 |
- - |
+ Supported |
Supported |
@@ -461,6 +549,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
+
+ FSAF |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
HRNet |
FP16 |
@@ -472,6 +571,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
+
+ RetinaFace |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
RetinaNet |
FP16 |
@@ -483,6 +593,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
+
+ RTMDet |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
YOLOv3 |
FP16 |
@@ -573,6 +694,28 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
+### Pose Estimation
+
+
+
+ Models |
+ Precision |
+ IGIE |
+ IxRT |
+
+
+ RTMPose |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
+
+
### Segmentation
diff --git a/data/datasets/README.md b/data/datasets/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f40c2e824e94c038e186d9e1ffa149a8382a41e2
--- /dev/null
+++ b/data/datasets/README.md
@@ -0,0 +1 @@
+# This is the default datasets location required by inference models
diff --git a/models/cv/classification/convnext_small/igie/README.md b/models/cv/classification/convnext_small/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e0aa51a06162b6d0a3c73826e93204c4bd362b0a
--- /dev/null
+++ b/models/cv/classification/convnext_small/igie/README.md
@@ -0,0 +1,47 @@
+# ConvNeXt Small
+
+## Description
+
+The ConvNeXt Small model represents a significant stride in the evolution of convolutional neural networks (CNNs), introduced by researchers at Facebook AI Research (FAIR) and UC Berkeley. It is part of the ConvNeXt family, which challenges the dominance of Vision Transformers (ViTs) in the realm of visual recognition tasks.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight convnext_small-0c510722.pth --output convnext_small.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_convnext_small_fp16_accuracy.sh
+# Performance
+bash scripts/infer_convnext_small_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| -------------- | --------- | --------- | ------- | -------- | -------- |
+| ConvNeXt Small | 32 | FP16 | 725.437 | 83.267 | 96.515 |
diff --git a/models/cv/classification/convnext_small/igie/build_engine.py b/models/cv/classification/convnext_small/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/classification/convnext_small/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/convnext_small/igie/export.py b/models/cv/classification/convnext_small/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ec39771ca57ad629d1ebbd5dac4ee71cd3d303c
--- /dev/null
+++ b/models/cv/classification/convnext_small/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ model = torchvision.models.convnext_small()
+ model.load_state_dict(torch.load(args.weight))
+ model.eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/convnext_small/igie/inference.py b/models/cv/classification/convnext_small/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696
--- /dev/null
+++ b/models/cv/classification/convnext_small/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ dataset = torchvision.datasets.ImageFolder(
+ data_path,
+ transforms.Compose(
+ [
+ transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+ transforms.CenterCrop(224),
+ transforms.PILToTensor(),
+ transforms.ConvertImageDtype(torch.float),
+ transforms.Normalize(
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225)
+ )
+ ]
+ )
+ )
+
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/convnext_small/igie/scripts/infer_convnext_small_fp16_accuracy.sh b/models/cv/classification/convnext_small/igie/scripts/infer_convnext_small_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c7837d790c90b4aa6af02ecc15e517d82acfc9c4
--- /dev/null
+++ b/models/cv/classification/convnext_small/igie/scripts/infer_convnext_small_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="convnext_small.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path convnext_small_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine convnext_small_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/convnext_small/igie/scripts/infer_convnext_small_fp16_performance.sh b/models/cv/classification/convnext_small/igie/scripts/infer_convnext_small_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cde8da5aaf4e74f7d6681cc43915e0faf6d1bbfc
--- /dev/null
+++ b/models/cv/classification/convnext_small/igie/scripts/infer_convnext_small_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="convnext_small.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path convnext_small_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine convnext_small_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/classification/cspdarknet50/igie/README.md b/models/cv/classification/cspdarknet50/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..99a374a454e09f84799c75ab3d444315a5b8bb40
--- /dev/null
+++ b/models/cv/classification/cspdarknet50/igie/README.md
@@ -0,0 +1,68 @@
+# CSPDarkNet50
+
+## Description
+
+CSPDarkNet50 is an enhanced convolutional neural network architecture that reduces redundant computations by integrating cross-stage partial network features and truncating gradient flow, thereby maintaining high accuracy while lowering computational costs.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install onnxsim
+pip3 install mmcv==1.5.3
+pip3 install mmcls
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# git clone mmpretrain
+git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
+
+# export onnx model
+python3 export.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight cspdarknet50_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet50.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim cspdarknet50.onnx cspdarknet50_opt.onnx
+
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_cspdarknet_fp16_accuracy.sh
+# Performance
+bash scripts/infer_cspdarknet_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ------------ | --------- | --------- | -------- | -------- | -------- |
+| CSPDarkNet50 | 32 | FP16 | 3214.387 | 79.063 | 94.492 |
+
+## Reference
+
+CSPDarkNet50:
diff --git a/models/cv/classification/cspdarknet50/igie/build_engine.py b/models/cv/classification/cspdarknet50/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/classification/cspdarknet50/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/cspdarknet50/igie/export.py b/models/cv/classification/cspdarknet50/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dc8d9fde531853bb9d25966dc7f706f2d9276dd
--- /dev/null
+++ b/models/cv/classification/cspdarknet50/igie/export.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+
+import torch
+from mmcls.apis import init_model
+
+class Model(torch.nn.Module):
+ def __init__(self, config_file, checkpoint_file):
+ super().__init__()
+ self.model = init_model(config_file, checkpoint_file, device="cpu")
+
+ def forward(self, x):
+ feat = self.model.backbone(x)
+ feat = self.model.neck(feat)
+ out_head = self.model.head.fc(feat[0])
+ return out_head
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ config_file = args.cfg
+ checkpoint_file = args.weight
+ model = Model(config_file, checkpoint_file).eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
+
diff --git a/models/cv/classification/cspdarknet50/igie/inference.py b/models/cv/classification/cspdarknet50/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b0c602a8f5c899e34f621851c10a5d00c47583c
--- /dev/null
+++ b/models/cv/classification/cspdarknet50/igie/inference.py
@@ -0,0 +1,185 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ dataset = torchvision.datasets.ImageFolder(
+ data_path,
+ transforms.Compose(
+ [
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.PILToTensor(),
+ transforms.ConvertImageDtype(torch.float),
+ transforms.Normalize(
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225)
+ )
+ ]
+ )
+ )
+
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/cspdarknet50/igie/scripts/infer_cspdarknet_fp16_accuracy.sh b/models/cv/classification/cspdarknet50/igie/scripts/infer_cspdarknet_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5a1ab2b29abf074f7e2e532ad6c6f7c38d48357c
--- /dev/null
+++ b/models/cv/classification/cspdarknet50/igie/scripts/infer_cspdarknet_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="cspdarknet50_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path cspdarknet_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine cspdarknet_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/cspdarknet50/igie/scripts/infer_cspdarknet_fp16_performance.sh b/models/cv/classification/cspdarknet50/igie/scripts/infer_cspdarknet_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e033958ccf69e04357f4b13fc06ce1fa8bc11d68
--- /dev/null
+++ b/models/cv/classification/cspdarknet50/igie/scripts/infer_cspdarknet_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="cspdarknet50_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path cspdarknet_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine cspdarknet_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/classification/deit_tiny/igie/README.md b/models/cv/classification/deit_tiny/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..663ca8b7bc56eb702922f3ff3457086ac6f270e5
--- /dev/null
+++ b/models/cv/classification/deit_tiny/igie/README.md
@@ -0,0 +1,68 @@
+# DeiT-tiny
+
+## Description
+
+DeiT Tiny is a lightweight vision transformer designed for data-efficient learning. It achieves rapid training and high accuracy on small datasets through innovative attention distillation methods, while maintaining the simplicity and efficiency of the model.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install onnxsim
+pip3 install mmcv==1.5.3
+pip3 install mmcls
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# git clone mmpretrain
+git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
+
+# export onnx model
+python3 export.py --cfg mmpretrain/configs/deit/deit-tiny_pt-4xb256_in1k.py --weight deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth --output deit_tiny.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim deit_tiny.onnx deit_tiny_opt.onnx
+
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_deit_tiny_fp16_accuracy.sh
+# Performance
+bash scripts/infer_deit_tin_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| --------- | --------- | --------- | -------- | -------- | -------- |
+| DeiT-tiny | 32 | FP16 | 2172.771 | 74.334 | 92.175 |
+
+## Reference
+
+Deit_tiny:
diff --git a/models/cv/classification/deit_tiny/igie/build_engine.py b/models/cv/classification/deit_tiny/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/classification/deit_tiny/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/deit_tiny/igie/export.py b/models/cv/classification/deit_tiny/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..0078670ffbf4bbcce3358d4a2cedc42ce61176f5
--- /dev/null
+++ b/models/cv/classification/deit_tiny/igie/export.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+
+import torch
+from mmcls.apis import init_model
+
+class Model(torch.nn.Module):
+ def __init__(self, config_file, checkpoint_file):
+ super().__init__()
+ self.model = init_model(config_file, checkpoint_file, device="cpu")
+
+ def forward(self, x):
+ feat = self.model.backbone(x)
+ head = self.model.head.pre_logits(feat)
+ out_head = self.model.head.layers.head(head)
+ return out_head
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ config_file = args.cfg
+ checkpoint_file = args.weight
+ model = Model(config_file, checkpoint_file).eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
+
diff --git a/models/cv/classification/deit_tiny/igie/inference.py b/models/cv/classification/deit_tiny/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b0c602a8f5c899e34f621851c10a5d00c47583c
--- /dev/null
+++ b/models/cv/classification/deit_tiny/igie/inference.py
@@ -0,0 +1,185 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ dataset = torchvision.datasets.ImageFolder(
+ data_path,
+ transforms.Compose(
+ [
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.PILToTensor(),
+ transforms.ConvertImageDtype(torch.float),
+ transforms.Normalize(
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225)
+ )
+ ]
+ )
+ )
+
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/deit_tiny/igie/scripts/infer_deit_tiny_fp16_accuracy.sh b/models/cv/classification/deit_tiny/igie/scripts/infer_deit_tiny_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f6f9a8fb070df948cbf9dbe3e6e3c043c6270063
--- /dev/null
+++ b/models/cv/classification/deit_tiny/igie/scripts/infer_deit_tiny_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="deit_tiny_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path deit_tiny_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine deit_tiny_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
diff --git a/models/cv/classification/deit_tiny/igie/scripts/infer_deit_tiny_fp16_performance.sh b/models/cv/classification/deit_tiny/igie/scripts/infer_deit_tiny_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a21f13b246ba7a65399c12d1613055be39d73e81
--- /dev/null
+++ b/models/cv/classification/deit_tiny/igie/scripts/infer_deit_tiny_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="deit_tiny_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path deit_tiny_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine deit_tiny_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/classification/densenet169/igie/README.md b/models/cv/classification/densenet169/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b6b8abd4859a500a096b917c5668670db19fb233
--- /dev/null
+++ b/models/cv/classification/densenet169/igie/README.md
@@ -0,0 +1,47 @@
+# DenseNet169
+
+## Description
+
+DenseNet-169 is a variant of the Dense Convolutional Network (DenseNet) architecture, characterized by its 169 layers and a growth rate of 32. This network leverages the dense connectivity pattern, where each layer is connected to every other layer in a feed-forward fashion, resulting in a substantial increase in the number of direct connections compared to traditional convolutional networks. This connectivity pattern facilitates the reuse of features and enhances the flow of information and gradients throughout the network, which is particularly beneficial for deep architectures.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight densenet169-b2777c0a.pth --output densenet169.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_densenet169_fp16_accuracy.sh
+# Performance
+bash scripts/infer_densenet169_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ----------- | --------- | --------- | -------- | -------- | -------- |
+| DenseNet169 | 32 | FP16 | 1384.649 | 75.548 | 92.778 |
diff --git a/models/cv/classification/densenet169/igie/build_engine.py b/models/cv/classification/densenet169/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/classification/densenet169/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/densenet169/igie/export.py b/models/cv/classification/densenet169/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..44c7269055407350e6af92f9a85d8d9bc5e5bab5
--- /dev/null
+++ b/models/cv/classification/densenet169/igie/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torchvision
+import argparse
+import re
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ model = torchvision.models.densenet169(weights=False)
+
+ state_dict = torch.load(args.weight)
+
+ pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$'
+ )
+ for key in list(state_dict.keys()):
+ res = pattern.match(key)
+ if res:
+ new_key = res.group(1) + res.group(2)
+ state_dict[new_key] = state_dict[key]
+ del state_dict[key]
+
+ model.load_state_dict(state_dict)
+ model.eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/densenet169/igie/inference.py b/models/cv/classification/densenet169/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696
--- /dev/null
+++ b/models/cv/classification/densenet169/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ dataset = torchvision.datasets.ImageFolder(
+ data_path,
+ transforms.Compose(
+ [
+ transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+ transforms.CenterCrop(224),
+ transforms.PILToTensor(),
+ transforms.ConvertImageDtype(torch.float),
+ transforms.Normalize(
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225)
+ )
+ ]
+ )
+ )
+
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/densenet169/igie/scripts/infer_densenet169_fp16_accuracy.sh b/models/cv/classification/densenet169/igie/scripts/infer_densenet169_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6475ddcdd58e63bfd7cd911dc3be1801026d6097
--- /dev/null
+++ b/models/cv/classification/densenet169/igie/scripts/infer_densenet169_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="densenet169.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path densenet169_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine densenet169_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/densenet169/igie/scripts/infer_densenet169_fp16_performance.sh b/models/cv/classification/densenet169/igie/scripts/infer_densenet169_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8a36fc2f0129a71e0fd3ef678e06ee4eb3651fa3
--- /dev/null
+++ b/models/cv/classification/densenet169/igie/scripts/infer_densenet169_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="densenet169.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path densenet169_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine densenet169_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b0/ixrt/README.md b/models/cv/classification/efficientnet_b0/ixrt/README.md
index aeba963e8848fa9f97f92f734179d963b888f307..55690187dbc51e74d89765ab0457d72aa7f65150 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/README.md
+++ b/models/cv/classification/efficientnet_b0/ixrt/README.md
@@ -55,5 +55,5 @@ bash scripts/infer_efficientnet_b0_int8_performance.sh
Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%)
----------------|-----------|----------|----------|----------|--------
-EfficientNet_B0 | 32 | FP16 | 2325.54 | 77.66 | 93.58
-EfficientNet_B0 | 32 | INT8 | 2666.00 | 74.27 | 91.85
+EfficientNet B0 | 32 | FP16 | 2325.54 | 77.66 | 93.58
+EfficientNet B0 | 32 | INT8 | 2666.00 | 74.27 | 91.85
diff --git a/models/cv/classification/efficientnet_b1/igie/README.md b/models/cv/classification/efficientnet_b1/igie/README.md
index dc5344e2dccb120849168f959d1678473dd8e858..0656f187cda59092a2912b82f05bbd425e196201 100644
--- a/models/cv/classification/efficientnet_b1/igie/README.md
+++ b/models/cv/classification/efficientnet_b1/igie/README.md
@@ -44,4 +44,4 @@ bash scripts/infer_efficientnet_b1_fp16_performance.sh
Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%)
----------------|-----------|----------|---------|---------|--------
-Efficientnet_b1 | 32 | FP16 | 1292.31 | 78.823 | 94.494
+EfficientNet B1 | 32 | FP16 | 1292.31 | 78.823 | 94.494
diff --git a/models/cv/classification/efficientnet_b2/igie/README.md b/models/cv/classification/efficientnet_b2/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f439723944caa5cc6d27ccf481117157bfc3eec2
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/igie/README.md
@@ -0,0 +1,47 @@
+# EfficientNet B2
+
+## Description
+
+EfficientNet B2 is a member of the EfficientNet family, a series of convolutional neural network architectures that are designed to achieve excellent accuracy and efficiency. Introduced by researchers at Google, EfficientNets utilize the compound scaling method, which uniformly scales the depth, width, and resolution of the network to improve accuracy and efficiency.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight efficientnet_b2_rwightman-c35c1473.pth --output efficientnet_b2.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_efficientnet_b2_fp16_accuracy.sh
+# Performance
+bash scripts/infer_efficientnet_b2_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| --------------- | --------- | --------- | -------- | -------- | -------- |
+| EfficientNet B2 | 32 | FP16 | 1527.044 | 77.739 | 93.702 |
diff --git a/models/cv/classification/efficientnet_b2/igie/build_engine.py b/models/cv/classification/efficientnet_b2/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/igie/export.py b/models/cv/classification/efficientnet_b2/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..7761ffaca5fd02798d898e12392f3e0501aef0ec
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ model = torchvision.models.efficientnet_b2()
+ model.load_state_dict(torch.load(args.weight))
+ model.eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/efficientnet_b2/igie/inference.py b/models/cv/classification/efficientnet_b2/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ dataset = torchvision.datasets.ImageFolder(
+ data_path,
+ transforms.Compose(
+ [
+ transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+ transforms.CenterCrop(224),
+ transforms.PILToTensor(),
+ transforms.ConvertImageDtype(torch.float),
+ transforms.Normalize(
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225)
+ )
+ ]
+ )
+ )
+
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/igie/scripts/infer_efficientnet_b2_fp16_accuracy.sh b/models/cv/classification/efficientnet_b2/igie/scripts/infer_efficientnet_b2_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d4ed3ef0893017781d18c20c4387aa8c7f52851c
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/igie/scripts/infer_efficientnet_b2_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="efficientnet_b2.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path efficientnet_b2_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine efficientnet_b2_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/igie/scripts/infer_efficientnet_b2_fp16_performance.sh b/models/cv/classification/efficientnet_b2/igie/scripts/infer_efficientnet_b2_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4b9602218a6a2c5be91f8ee2f97e63df7abc85e4
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/igie/scripts/infer_efficientnet_b2_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="efficientnet_b2.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path efficientnet_b2_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine efficientnet_b2_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/igie/README.md b/models/cv/classification/efficientnet_v2/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cbcc5a4eb2cbc136a1a7b489f53efe4e022808e0
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/igie/README.md
@@ -0,0 +1,47 @@
+# EfficientNetV2-M
+
+## Description
+
+EfficientNetV2 M is an optimized model in the EfficientNetV2 series, which was developed by Google researchers. It continues the legacy of the EfficientNet family, focusing on advancing the state-of-the-art in accuracy and efficiency through advanced scaling techniques and architectural innovations.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight efficientnet_v2_m-dc08266a.pth --output efficientnet_v2_m.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_efficientnet_v2_m_fp16_accuracy.sh
+# Performance
+bash scripts/infer_efficientnet_v2_m_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ---------------- | --------- | --------- | -------- | -------- | -------- |
+| EfficientNetV2-M | 32 | FP16 | 1104.846 | 79.635 | 94.456 |
diff --git a/models/cv/classification/efficientnet_v2/igie/build_engine.py b/models/cv/classification/efficientnet_v2/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/igie/export.py b/models/cv/classification/efficientnet_v2/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfb9f76e7503ce443da7090c5739e4880652f194
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ model = torchvision.models.efficientnet_v2_m()
+ model.load_state_dict(torch.load(args.weight))
+ model.eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/efficientnet_v2/igie/inference.py b/models/cv/classification/efficientnet_v2/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ dataset = torchvision.datasets.ImageFolder(
+ data_path,
+ transforms.Compose(
+ [
+ transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+ transforms.CenterCrop(224),
+ transforms.PILToTensor(),
+ transforms.ConvertImageDtype(torch.float),
+ transforms.Normalize(
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225)
+ )
+ ]
+ )
+ )
+
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/igie/scripts/infer_efficientnet_v2_m_fp16_accuracy.sh b/models/cv/classification/efficientnet_v2/igie/scripts/infer_efficientnet_v2_m_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d7aa78a2d9b15370dc1e2a5fc5a9fd14ab68668d
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/igie/scripts/infer_efficientnet_v2_m_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="efficientnet_v2_m.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path efficientnet_v2_m_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine efficientnet_v2_m_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/igie/scripts/infer_efficientnet_v2_m_fp16_performance.sh b/models/cv/classification/efficientnet_v2/igie/scripts/infer_efficientnet_v2_m_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..21073983ec294f1612356de6848ed2c08f9374d4
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2/igie/scripts/infer_efficientnet_v2_m_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="efficientnet_v2_m.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path efficientnet_v2_m_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine efficientnet_v2_m_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v2/ixrt/README.md b/models/cv/classification/mobilenet_v2/ixrt/README.md
index 46343115fc731320eb2f61b0358695cb0608116b..10852a9550a34658bf9bc5b8d4a0dadd91e98bb9 100644
--- a/models/cv/classification/mobilenet_v2/ixrt/README.md
+++ b/models/cv/classification/mobilenet_v2/ixrt/README.md
@@ -13,17 +13,19 @@ pip3 install tqdm
pip3 install onnxsim
pip3 install opencv-python
pip3 install ppq
+pip3 install protobuf==3.20.0
```
### Download
-Download the [imagenet](https://www.image-net.org/download.php) validation dataset, and place in data/datasets;
+Download the [imagenet](https://www.image-net.org/download.php) validation dataset, and place in `${PROJ_ROOT}/data/datasets`;
## Inference
### FP16
```bash
+cd python/
# Test ACC
bash script/infer_mobilenetv2_fp16_accuary.sh
# Test FPS
diff --git a/models/cv/classification/mobilenet_v2/ixrt/python/inference.py b/models/cv/classification/mobilenet_v2/ixrt/python/inference.py
index e726dabc1f19cadeda9f130ef52f8b36ad435d26..ea3f7f6b47414387508d955f71344a4af3217167 100644
--- a/models/cv/classification/mobilenet_v2/ixrt/python/inference.py
+++ b/models/cv/classification/mobilenet_v2/ixrt/python/inference.py
@@ -85,6 +85,7 @@ def main(config):
total_sample = 0
acc_top1, acc_top5 = 0, 0
+ start_time = time.time()
with tqdm(total= len(dataloader)) as _tqdm:
for idx, (batch_data, batch_label) in enumerate(dataloader):
batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -106,7 +107,10 @@ def main(config):
_tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
acc_5='{:.4f}'.format(acc_top5/total_sample))
_tqdm.update(1)
+ end_time = time.time()
+ end2end_time = end_time - start_time
+ print(F"E2E time : {end2end_time:.3f} seconds")
print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/repvgg/igie/README.md b/models/cv/classification/repvgg/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cbbeaac49e4c93cebf25e693f6499d4be333c252
--- /dev/null
+++ b/models/cv/classification/repvgg/igie/README.md
@@ -0,0 +1,65 @@
+# RepVGG
+
+## Description
+
+RepVGG is an innovative convolutional neural network architecture that combines the simplicity of VGG-style inference with a multi-branch topology during training. Through structural re-parameterization, RepVGG achieves high accuracy while significantly improving computational efficiency.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install mmcv==1.5.3
+pip3 install mmcls
+pip3 install mmengine
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# git clone mmpretrain
+git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
+
+# export onnx model
+python3 export.py --cfg mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py --weight repvgg-A0_8xb32_in1k_20221213-60ae8e23.pth --output repvgg.onnx
+
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_repvgg_fp16_accuracy.sh
+# Performance
+bash scripts/infer_repvgg_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ------ | --------- | --------- | -------- | -------- | -------- |
+| RepVGG | 32 | FP16 | 7423.035 | 72.345 | 90.543 |
+
+## Reference
+
+RepVGG:
diff --git a/models/cv/classification/repvgg/igie/build_engine.py b/models/cv/classification/repvgg/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/classification/repvgg/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/repvgg/igie/export.py b/models/cv/classification/repvgg/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dc8d9fde531853bb9d25966dc7f706f2d9276dd
--- /dev/null
+++ b/models/cv/classification/repvgg/igie/export.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+
+import torch
+from mmcls.apis import init_model
+
+class Model(torch.nn.Module):
+ def __init__(self, config_file, checkpoint_file):
+ super().__init__()
+ self.model = init_model(config_file, checkpoint_file, device="cpu")
+
+ def forward(self, x):
+ feat = self.model.backbone(x)
+ feat = self.model.neck(feat)
+ out_head = self.model.head.fc(feat[0])
+ return out_head
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ config_file = args.cfg
+ checkpoint_file = args.weight
+ model = Model(config_file, checkpoint_file).eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
+
diff --git a/models/cv/classification/repvgg/igie/inference.py b/models/cv/classification/repvgg/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b0c602a8f5c899e34f621851c10a5d00c47583c
--- /dev/null
+++ b/models/cv/classification/repvgg/igie/inference.py
@@ -0,0 +1,185 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ dataset = torchvision.datasets.ImageFolder(
+ data_path,
+ transforms.Compose(
+ [
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.PILToTensor(),
+ transforms.ConvertImageDtype(torch.float),
+ transforms.Normalize(
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225)
+ )
+ ]
+ )
+ )
+
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/repvgg/igie/scripts/infer_repvgg_fp16_accuracy.sh b/models/cv/classification/repvgg/igie/scripts/infer_repvgg_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..37f6fcb2ab964b22bd9e29f0015a1b2900f5a560
--- /dev/null
+++ b/models/cv/classification/repvgg/igie/scripts/infer_repvgg_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="repvgg.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path repvgg_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine repvgg_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/repvgg/igie/scripts/infer_repvgg_fp16_performance.sh b/models/cv/classification/repvgg/igie/scripts/infer_repvgg_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ad29f598b8dbe3512e5288bfd7c757df89654b09
--- /dev/null
+++ b/models/cv/classification/repvgg/igie/scripts/infer_repvgg_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="repvgg.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path repvgg_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine repvgg_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/classification/repvgg/ixrt/README.md b/models/cv/classification/repvgg/ixrt/README.md
index 95104dad4374362a8bded98e5c3557065dbbc30e..37bbbcd480afdbddb3fd622c2eeacb59f2faeacb 100644
--- a/models/cv/classification/repvgg/ixrt/README.md
+++ b/models/cv/classification/repvgg/ixrt/README.md
@@ -1,4 +1,4 @@
-# REPVGG
+# RepVGG
## Description
@@ -65,6 +65,6 @@ bash scripts/infer_repvgg_fp16_performance.sh
## Results
-Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%)
--------|-----------|----------|---------|----------|--------
-REPVGG | 32 | FP16 | 5725.37 | 72.41 | 90.49
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ------ | --------- | --------- | ------- | -------- | -------- |
+| RepVGG | 32 | FP16 | 5725.37 | 72.41 | 90.49 |
diff --git a/models/cv/classification/resnet50/ixrt/inference.py b/models/cv/classification/resnet50/ixrt/inference.py
index 2c9dcb3f9cc5b9a26903651a31fafa16d8f0db31..50aafd4fd5ef9664203cdcbdfbdb577edca933c4 100644
--- a/models/cv/classification/resnet50/ixrt/inference.py
+++ b/models/cv/classification/resnet50/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
total_sample = 0
acc_top1, acc_top5 = 0, 0
+ start_time = time.time()
with tqdm(total= len(dataloader)) as _tqdm:
for idx, (batch_data, batch_label) in enumerate(dataloader):
batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -104,7 +105,10 @@ def main(config):
_tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
acc_5='{:.4f}'.format(acc_top5/total_sample))
_tqdm.update(1)
+ end_time = time.time()
+ end2end_time = end_time - start_time
+ print(F"E2E time : {end2end_time:.3f} seconds")
print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/se_resnet50/igie/README.md b/models/cv/classification/se_resnet50/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ab59b0e8cdf70d652e3de0677488f2288ddbe429
--- /dev/null
+++ b/models/cv/classification/se_resnet50/igie/README.md
@@ -0,0 +1,64 @@
+# SEResNet50
+
+## Description
+
+SEResNet50 is an enhanced version of the ResNet50 network integrated with Squeeze-and-Excitation (SE) blocks, which strengthens the network's feature expression capability by explicitly emphasizing useful features and suppressing irrelevant ones. This improvement enables SEResNet50 to demonstrate higher accuracy in various visual recognition tasks compared to the standard ResNet50.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install mmcv==1.5.3
+pip3 install mmcls
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# git clone mmpretrain
+git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
+
+# export onnx model
+python3 export.py --cfg mmpretrain/configs/seresnet/seresnet50_8xb32_in1k.py --weight se-resnet50_batch256_imagenet_20200804-ae206104.pth --output seresnet50.onnx
+
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_seresnet_fp16_accuracy.sh
+# Performance
+bash scripts/infer_seresnet_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ---------- | --------- | --------- | -------- | -------- | -------- |
+| SEResNet50 | 32 | FP16 | 2548.268 | 77.709 | 93.812 |
+
+## Reference
+
+SE_ResNet50:
diff --git a/models/cv/classification/se_resnet50/igie/build_engine.py b/models/cv/classification/se_resnet50/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/classification/se_resnet50/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/se_resnet50/igie/export.py b/models/cv/classification/se_resnet50/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dc8d9fde531853bb9d25966dc7f706f2d9276dd
--- /dev/null
+++ b/models/cv/classification/se_resnet50/igie/export.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+
+import torch
+from mmcls.apis import init_model
+
+class Model(torch.nn.Module):
+ def __init__(self, config_file, checkpoint_file):
+ super().__init__()
+ self.model = init_model(config_file, checkpoint_file, device="cpu")
+
+ def forward(self, x):
+ feat = self.model.backbone(x)
+ feat = self.model.neck(feat)
+ out_head = self.model.head.fc(feat[0])
+ return out_head
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ config_file = args.cfg
+ checkpoint_file = args.weight
+ model = Model(config_file, checkpoint_file).eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
+
diff --git a/models/cv/classification/se_resnet50/igie/inference.py b/models/cv/classification/se_resnet50/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b0c602a8f5c899e34f621851c10a5d00c47583c
--- /dev/null
+++ b/models/cv/classification/se_resnet50/igie/inference.py
@@ -0,0 +1,185 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ dataset = torchvision.datasets.ImageFolder(
+ data_path,
+ transforms.Compose(
+ [
+ transforms.Resize(256),
+ transforms.CenterCrop(224),
+ transforms.PILToTensor(),
+ transforms.ConvertImageDtype(torch.float),
+ transforms.Normalize(
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225)
+ )
+ ]
+ )
+ )
+
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/se_resnet50/igie/scripts/infer_seresnet_fp16_accuracy.sh b/models/cv/classification/se_resnet50/igie/scripts/infer_seresnet_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c861e85be2300f51b41c671d09513272480cb5fe
--- /dev/null
+++ b/models/cv/classification/se_resnet50/igie/scripts/infer_seresnet_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="seresnet50.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path seresnet_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine seresnet_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/se_resnet50/igie/scripts/infer_seresnet_fp16_performance.sh b/models/cv/classification/se_resnet50/igie/scripts/infer_seresnet_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..caaeaa7c2035f56fe4f62fd23387024e8c85875d
--- /dev/null
+++ b/models/cv/classification/se_resnet50/igie/scripts/infer_seresnet_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="seresnet50.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path seresnet_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine seresnet_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/classification/shufflenetv2_x1_0/igie/README.md b/models/cv/classification/shufflenetv2_x1_0/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1211bba938ee81dcf2c76b9b85d81fd71ddc69a5
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_0/igie/README.md
@@ -0,0 +1,47 @@
+# ShuffleNetV2_x1_0
+
+## Description
+
+ShuffleNet V2_x1_0 is an efficient convolutional neural network (CNN) architecture that emphasizes a balance between computational efficiency and accuracy, particularly suited for deployment on mobile and embedded devices. The model refines the ShuffleNet series by introducing structural innovations that enhance feature reuse and reduce redundancy, all while maintaining simplicity and performance.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight shufflenetv2_x1-5666bf0f80.pth --output shufflenetv2_x1_0.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_shufflenetv2_x1_0_fp16_accuracy.sh
+# Performance
+bash scripts/infer_shufflenetv2_x1_0_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ----------------- | --------- | --------- | -------- | -------- | -------- |
+| ShuffleNetV2_x1_0 | 32 | FP16 | 8232.980 | 69.308 | 88.302 |
diff --git a/models/cv/classification/shufflenetv2_x1_0/igie/build_engine.py b/models/cv/classification/shufflenetv2_x1_0/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_0/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/shufflenetv2_x1_0/igie/export.py b/models/cv/classification/shufflenetv2_x1_0/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a4ff9ba8168c1e33a9a5677facc9b9d03afb911
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_0/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ model = torchvision.models.shufflenet_v2_x1_0()
+ model.load_state_dict(torch.load(args.weight))
+ model.eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/shufflenetv2_x1_0/igie/inference.py b/models/cv/classification/shufflenetv2_x1_0/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..3aef3ec70fa7e88917c54aeb8242fc73a910c696
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_0/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ dataset = torchvision.datasets.ImageFolder(
+ data_path,
+ transforms.Compose(
+ [
+ transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+ transforms.CenterCrop(224),
+ transforms.PILToTensor(),
+ transforms.ConvertImageDtype(torch.float),
+ transforms.Normalize(
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225)
+ )
+ ]
+ )
+ )
+
+ dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/shufflenetv2_x1_0/igie/scripts/infer_shufflenetv2_x1_0_fp16_accuracy.sh b/models/cv/classification/shufflenetv2_x1_0/igie/scripts/infer_shufflenetv2_x1_0_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..91802347326ce32c17d7f0a3309328a3976314db
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_0/igie/scripts/infer_shufflenetv2_x1_0_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="shufflenetv2_x1_0.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path shufflenetv2_x1_0_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine shufflenetv2_x1_0_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/shufflenetv2_x1_0/igie/scripts/infer_shufflenetv2_x1_0_fp16_performance.sh b/models/cv/classification/shufflenetv2_x1_0/igie/scripts/infer_shufflenetv2_x1_0_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..21353dc71844e052a925a493f58d6860a20323da
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_0/igie/scripts/infer_shufflenetv2_x1_0_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="shufflenetv2_x1_0.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,224,224 \
+ --precision fp16 \
+ --engine_path shufflenetv2_x1_0_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine shufflenetv2_x1_0_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/classification/swin_transformer_Large/ixrt/README.md b/models/cv/classification/swin_transformer_Large/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9cb811bfc076bee7d42775a16f8e8803180aebcb
--- /dev/null
+++ b/models/cv/classification/swin_transformer_Large/ixrt/README.md
@@ -0,0 +1,83 @@
+# Swin-L
+
+## Description
+
+Swin Transformer-Large is a variant of the Swin Transformer, an architecture designed for computer vision tasks, particularly within the realms of image classification, object detection, and segmentation. The Swin Transformer-Large model represents an expanded version with more layers and parameters compared to its base configuration, aiming for improved performance and deeper processing of visual data.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnxsim
+pip3 install onnx_graphsurgeon
+pip3 install scikit-learn
+pip3 install tqdm
+pip3 install pycuda
+pip3 install onnx
+pip3 install tabulate
+pip3 install cv2
+pip3 install pycocotools
+pip3 install opencv-python==4.6.0.66
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the open_imagenet dataset.
+
+or you can :
+```bash
+bash /scripts/prepare_model_and_dataset.sh
+
+```
+
+### Model Conversion
+Please correct the paths in the following commands or files.
+```bash
+tar -xvf open-swin-large.tar
+wget
+python3 torch2onnx.py --model_path swin-transformer-large.pt --output_path swin-large-torch-fp32.onnx
+
+```
+
+## Inference
+
+
+```bash
+export ORIGIN_ONNX_NAME=/Path/swin-large-torch-fp32.onnx
+export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export PROJ_PATH=./
+```
+
+### Performance
+
+```bash
+
+bash scripts/infer_swinl_fp16_performance.sh
+```
+
+### Accuracy
+
+If you want to evaluate the accuracy of this model, please visit here: , which integrates inference and training of many models under this framework, supporting the ILUVATAR backend
+
+
+For detailed steps regarding this model, please refer to this document: Note: You need to modify the relevant paths in the code to your own correct paths.
+
+```bash
+
+pip3 install -r toolbox/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/requirements.txt
+mv /ixrt/perf_engine.py toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+cd toolbox/ByteMLPerf/byte_infer_perf/
+mv /general_perf/general_perf/model_zoo/popular/swin-large /general_perf/model_zoo/popular/swin-large
+cd toolbox/ByteMLPerf/byte_infer_perf/general_perf
+python3 core/perf_engine.py --hardware_type ILUVATAR --task swin-large-torch-fp32
+```
+
+
+## Results
+
+Model |BatchSize |Precision |QPS |Top-1 Acc |
+--------|-----------|----------|----------|-----------|
+Swin-L | 16 | FP16 | 5.746 | 85.62 |
\ No newline at end of file
diff --git a/models/cv/detection/atss/igie/README.md b/models/cv/detection/atss/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fa4ffd64486563276fb4ad966cc5eef7fb99d3c3
--- /dev/null
+++ b/models/cv/detection/atss/igie/README.md
@@ -0,0 +1,68 @@
+# ATSS
+
+## Description
+
+ATSS is an advanced adaptive training sample selection method that effectively enhances the performance of both anchor-based and anchor-free object detectors by dynamically choosing positive and negative samples based on the statistical characteristics of objects. The design of ATSS reduces reliance on hyperparameters, simplifies the sample selection process, and significantly improves detection accuracy without adding extra computational costs.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install onnxsim
+pip3 install mmdet==3.3.0
+pip3 install mmdeploy==1.3.1
+pip3 install mmengine==0.10.4
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+<<<<<<< Updated upstream
+```bash
+wget https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth
+```
+
+=======
+>>>>>>> Stashed changes
+### Model Conversion
+
+```bash
+# export onnx model
+python3 export.py --weight atss_r50_fpn_1x_coco_20200209-985f7bd0.pth --cfg atss_r50_fpn_1x_coco.py --output atss.onnx
+
+# use onnxsim optimize onnx model
+onnxsim atss.onnx atss_opt.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_atss_fp16_accuracy.sh
+# Performance
+bash scripts/infer_atss_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Input Shape | Precision | FPS | mAP@0.5(%) |
+| :---: | :-------: | :---------: | :-------: | :-----: | :--------: |
+| ATSS | 32 | 800x800 | FP16 | 126.864 | 0.541 |
diff --git a/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py b/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py
new file mode 100755
index 0000000000000000000000000000000000000000..0378cf0b6f9307ccd1e931eab2c705ab3b121475
--- /dev/null
+++ b/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py
@@ -0,0 +1,285 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+backend_args = None
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+ checkpoint=dict(interval=1, type='CheckpointHook'),
+ logger=dict(interval=50, type='LoggerHook'),
+ param_scheduler=dict(type='ParamSchedulerHook'),
+ sampler_seed=dict(type='DistSamplerSeedHook'),
+ timer=dict(type='IterTimerHook'),
+ visualization=dict(type='DetVisualizationHook'))
+default_scope = 'mmdet'
+env_cfg = dict(
+ cudnn_benchmark=False,
+ dist_cfg=dict(backend='nccl'),
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+load_from = None
+log_level = 'ERROR'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+model = dict(
+ backbone=dict(
+ depth=50,
+ frozen_stages=1,
+ init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'),
+ norm_cfg=dict(requires_grad=True, type='BN'),
+ norm_eval=True,
+ num_stages=4,
+ out_indices=(
+ 0,
+ 1,
+ 2,
+ 3,
+ ),
+ style='pytorch',
+ type='ResNet'),
+ bbox_head=dict(
+ anchor_generator=dict(
+ octave_base_scale=8,
+ ratios=[
+ 1.0,
+ ],
+ scales_per_octave=1,
+ strides=[
+ 8,
+ 16,
+ 32,
+ 64,
+ 128,
+ ],
+ type='AnchorGenerator'),
+ bbox_coder=dict(
+ target_means=[
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ ],
+ target_stds=[
+ 0.1,
+ 0.1,
+ 0.2,
+ 0.2,
+ ],
+ type='DeltaXYWHBBoxCoder'),
+ feat_channels=256,
+ in_channels=256,
+ loss_bbox=dict(loss_weight=2.0, type='GIoULoss'),
+ loss_centerness=dict(
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
+ loss_cls=dict(
+ alpha=0.25,
+ gamma=2.0,
+ loss_weight=1.0,
+ type='FocalLoss',
+ use_sigmoid=True),
+ num_classes=80,
+ stacked_convs=4,
+ type='ATSSHead'),
+ data_preprocessor=dict(
+ bgr_to_rgb=True,
+ mean=[
+ 123.675,
+ 116.28,
+ 103.53,
+ ],
+ pad_size_divisor=32,
+ std=[
+ 58.395,
+ 57.12,
+ 57.375,
+ ],
+ type='DetDataPreprocessor'),
+ neck=dict(
+ add_extra_convs='on_output',
+ in_channels=[
+ 256,
+ 512,
+ 1024,
+ 2048,
+ ],
+ num_outs=5,
+ out_channels=256,
+ start_level=1,
+ type='FPN'),
+ test_cfg=dict(
+ max_per_img=100,
+ min_bbox_size=0,
+ nms=dict(iou_threshold=0.6, type='nms'),
+ nms_pre=1000,
+ score_thr=0.05),
+ train_cfg=dict(
+ allowed_border=-1,
+ assigner=dict(topk=9, type='ATSSAssigner'),
+ debug=False,
+ pos_weight=-1),
+ type='ATSS')
+optim_wrapper = dict(
+ optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
+ type='OptimWrapper')
+param_scheduler = [
+ dict(
+ begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
+ dict(
+ begin=0,
+ by_epoch=True,
+ end=12,
+ gamma=0.1,
+ milestones=[
+ 8,
+ 11,
+ ],
+ type='MultiStepLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+ batch_size=32,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='images/val2017/'),
+ data_root='/root/.igie_cache/modelzoo_data/datasets/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=False, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+ ann_file=
+ '/root/.igie_cache/modelzoo_data/datasets/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+test_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=False, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+]
+train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
+ batch_size=2,
+ dataset=dict(
+ ann_file='annotations/instances_train2017.json',
+ backend_args=None,
+ data_prefix=dict(img='train2017/'),
+ data_root='data/coco/',
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+ ],
+ type='CocoDataset'),
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+ batch_size=1,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='val2017/'),
+ data_root='data/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=False, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+ ann_file='data/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+vis_backends = [
+ dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+ name='visualizer',
+ type='DetLocalVisualizer',
+ vis_backends=[
+ dict(type='LocalVisBackend'),
+ ])
+work_dir = './'
diff --git a/models/cv/detection/atss/igie/build_engine.py b/models/cv/detection/atss/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/detection/atss/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/atss/igie/deploy_default.py b/models/cv/detection/atss/igie/deploy_default.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8d8e43dc829456f0c2e46a7acfc3128757f945d
--- /dev/null
+++ b/models/cv/detection/atss/igie/deploy_default.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+onnx_config = dict(
+ type='onnx',
+ export_params=True,
+ keep_initializers_as_inputs=False,
+ opset_version=11,
+ save_file='end2end.onnx',
+ input_names=['input'],
+ output_names=['output'],
+ input_shape=None,
+ optimize=True)
+
+codebase_config = dict(
+ type='mmdet',
+ task='ObjectDetection',
+ model_type='end2end',
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ ))
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/detection/atss/igie/export.py b/models/cv/detection/atss/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..13573c9dff3d96be4ba59eaa8698d67fb1d50f13
--- /dev/null
+++ b/models/cv/detection/atss/igie/export.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ deploy_cfg = 'deploy_default.py'
+ model_cfg = args.cfg
+ model_checkpoint = args.weight
+
+ deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+ task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+ model = task_processor.build_pytorch_model(model_checkpoint)
+
+ input_names = ['input']
+ dynamic_axes = {'input': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 800, 800)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/atss/igie/inference.py b/models/cv/detection/atss/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc1a2b8604533313fb7cb8f41d5d899e3ce25553
--- /dev/null
+++ b/models/cv/detection/atss/igie/inference.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import argparse
+import tvm
+import torch
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from mmpose.registry import RUNNERS
+from mmengine.config import Config
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # runner config
+ cfg = Config.fromfile("atss_r50_fpn_1x_coco.py")
+
+ cfg.work_dir = "./"
+ cfg['test_dataloader']['batch_size'] = batch_size
+ cfg['test_dataloader']['dataset']['data_root'] = args.datasets
+ cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
+ cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+ cfg['log_level'] = 'ERROR'
+
+ # build runner
+ runner = RUNNERS.build(cfg)
+
+ for data in tqdm(runner.test_dataloader):
+ cls_score = []
+ box_reg = []
+ score_factors = []
+
+ input_data = runner.model.data_preprocessor(data, False)
+ image = input_data['inputs'].cpu()
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input("input", tvm.nd.array(image, device))
+
+ module.run()
+
+ for i in range(module.get_num_outputs()):
+ output = module.get_output(i).asnumpy()
+
+ if pad_batch:
+ output = output[:origin_size]
+
+ output = torch.from_numpy(output)
+
+ if output.shape[1] == 80:
+ cls_score.append(output)
+ elif output.shape[1] == 4:
+ box_reg.append(output)
+ else:
+ score_factors.append(output)
+
+ batch_img_metas = [
+ data_samples.metainfo for data_samples in data['data_samples']
+ ]
+
+ preds = runner.model.bbox_head.predict_by_feat(
+ cls_score, box_reg, score_factors=score_factors, batch_img_metas=batch_img_metas, rescale=True
+ )
+
+ batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], preds)
+
+ runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=data)
+
+ metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/detection/atss/igie/scripts/infer_atss_fp16_accuracy.sh b/models/cv/detection/atss/igie/scripts/infer_atss_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8bb8ab47171fbd78b8ac2417bc02749f6303279a
--- /dev/null
+++ b/models/cv/detection/atss/igie/scripts/infer_atss_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="atss_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,800,800 \
+ --precision fp16 \
+ --engine_path atss_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine atss_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/detection/atss/igie/scripts/infer_atss_fp16_performance.sh b/models/cv/detection/atss/igie/scripts/infer_atss_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..427c05be1cdbe3513d06f9b338097f27c4883b9d
--- /dev/null
+++ b/models/cv/detection/atss/igie/scripts/infer_atss_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="atss_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,800,800 \
+ --precision fp16 \
+ --engine_path atss_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine atss_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/detection/fcos/igie/README.md b/models/cv/detection/fcos/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2aa984220574c4ad554fd867422e6047bcb23ca0
--- /dev/null
+++ b/models/cv/detection/fcos/igie/README.md
@@ -0,0 +1,65 @@
+# FCOS
+
+## Description
+
+FCOS is an innovative one-stage object detection framework that abandons traditional anchor box dependency and uses a fully convolutional network for per-pixel target prediction. By introducing a centerness branch and multi-scale feature fusion, FCOS enhances detection performance while simplifying the model structure, especially in detecting small and overlapping targets. Additionally, FCOS eliminates the need for hyperparameter tuning related to anchor boxes, streamlining the model training and tuning process.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install onnxsim
+pip3 install mmdet==3.3.0
+pip3 install mmdeploy==1.3.1
+pip3 install mmengine==0.10.4
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+```bash
+wget https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth
+```
+
+### Model Conversion
+
+```bash
+# export onnx model
+python3 export.py --weight fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth --cfg fcos_r50_caffe_fpn_gn-head_1x_coco.py --output fcos.onnx
+
+# use onnxsim optimize onnx model
+onnxsim fcos.onnx fcos_opt.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_fcos_fp16_accuracy.sh
+# Performance
+bash scripts/infer_fcos_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Input Shape | Precision | FPS | mAP@0.5(%) |
+| :---: | :-------: | :---------: | :-------: | :-----: | :--------: |
+| FCOS | 32 | 800x800 | FP16 | 135.019 | 0.522 |
diff --git a/models/cv/detection/fcos/igie/build_engine.py b/models/cv/detection/fcos/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/detection/fcos/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/fcos/igie/deploy_default.py b/models/cv/detection/fcos/igie/deploy_default.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8d8e43dc829456f0c2e46a7acfc3128757f945d
--- /dev/null
+++ b/models/cv/detection/fcos/igie/deploy_default.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+onnx_config = dict(
+ type='onnx',
+ export_params=True,
+ keep_initializers_as_inputs=False,
+ opset_version=11,
+ save_file='end2end.onnx',
+ input_names=['input'],
+ output_names=['output'],
+ input_shape=None,
+ optimize=True)
+
+codebase_config = dict(
+ type='mmdet',
+ task='ObjectDetection',
+ model_type='end2end',
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ ))
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/detection/fcos/igie/export.py b/models/cv/detection/fcos/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..13573c9dff3d96be4ba59eaa8698d67fb1d50f13
--- /dev/null
+++ b/models/cv/detection/fcos/igie/export.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ deploy_cfg = 'deploy_default.py'
+ model_cfg = args.cfg
+ model_checkpoint = args.weight
+
+ deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+ task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+ model = task_processor.build_pytorch_model(model_checkpoint)
+
+ input_names = ['input']
+ dynamic_axes = {'input': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 800, 800)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py b/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py
new file mode 100755
index 0000000000000000000000000000000000000000..04941d978d5a49cf71df16acf4e0b7486c0ea56d
--- /dev/null
+++ b/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py
@@ -0,0 +1,263 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+backend_args = None
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+ checkpoint=dict(interval=1, type='CheckpointHook'),
+ logger=dict(interval=50, type='LoggerHook'),
+ param_scheduler=dict(type='ParamSchedulerHook'),
+ sampler_seed=dict(type='DistSamplerSeedHook'),
+ timer=dict(type='IterTimerHook'),
+ visualization=dict(type='DetVisualizationHook'))
+default_scope = 'mmdet'
+env_cfg = dict(
+ cudnn_benchmark=False,
+ dist_cfg=dict(backend='nccl'),
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+evaluation = dict(interval=1, metric='bbox')
+load_from = None
+log_level = 'ERROR'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+model = dict(
+ backbone=dict(
+ depth=50,
+ frozen_stages=1,
+ init_cfg=dict(
+ checkpoint='open-mmlab://detectron/resnet50_caffe',
+ type='Pretrained'),
+ norm_cfg=dict(requires_grad=False, type='BN'),
+ norm_eval=True,
+ num_stages=4,
+ out_indices=(
+ 0,
+ 1,
+ 2,
+ 3,
+ ),
+ style='caffe',
+ type='ResNet'),
+ bbox_head=dict(
+ feat_channels=256,
+ in_channels=256,
+ loss_bbox=dict(loss_weight=1.0, type='IoULoss'),
+ loss_centerness=dict(
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
+ loss_cls=dict(
+ alpha=0.25,
+ gamma=2.0,
+ loss_weight=1.0,
+ type='FocalLoss',
+ use_sigmoid=True),
+ num_classes=80,
+ stacked_convs=4,
+ strides=[
+ 8,
+ 16,
+ 32,
+ 64,
+ 128,
+ ],
+ type='FCOSHead'),
+ data_preprocessor=dict(
+ bgr_to_rgb=False,
+ mean=[
+ 102.9801,
+ 115.9465,
+ 122.7717,
+ ],
+ pad_size_divisor=32,
+ std=[
+ 1.0,
+ 1.0,
+ 1.0,
+ ],
+ type='DetDataPreprocessor'),
+ neck=dict(
+ add_extra_convs='on_output',
+ in_channels=[
+ 256,
+ 512,
+ 1024,
+ 2048,
+ ],
+ num_outs=5,
+ out_channels=256,
+ relu_before_extra_convs=True,
+ start_level=1,
+ type='FPN'),
+ test_cfg=dict(
+ max_per_img=100,
+ min_bbox_size=0,
+ nms=dict(iou_threshold=0.5, type='nms'),
+ nms_pre=1000,
+ score_thr=0.05),
+ type='FCOS')
+optim_wrapper = dict(
+ optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
+ type='OptimWrapper')
+param_scheduler = [
+ dict(
+ begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
+ dict(
+ begin=0,
+ by_epoch=True,
+ end=12,
+ gamma=0.1,
+ milestones=[
+ 8,
+ 11,
+ ],
+ type='MultiStepLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+ batch_size=32,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='images/val2017/'),
+ data_root='/root/.igie_cache/modelzoo_data/datasets/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+ ann_file=
+ '/root/.igie_cache/modelzoo_data/datasets/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+test_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+]
+train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
+ batch_size=2,
+ dataset=dict(
+ ann_file='annotations/instances_train2017.json',
+ backend_args=None,
+ data_prefix=dict(img='train2017/'),
+ data_root='data/coco/',
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+ ],
+ type='CocoDataset'),
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+ batch_size=1,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='val2017/'),
+ data_root='data/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+ ann_file='data/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+vis_backends = [
+ dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+ name='visualizer',
+ type='DetLocalVisualizer',
+ vis_backends=[
+ dict(type='LocalVisBackend'),
+ ])
+work_dir = './'
diff --git a/models/cv/detection/fcos/igie/inference.py b/models/cv/detection/fcos/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0517124fcb3e4cf136adfff62f6dda9c2765064
--- /dev/null
+++ b/models/cv/detection/fcos/igie/inference.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import argparse
+import tvm
+import torch
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from mmpose.registry import RUNNERS
+from mmengine.config import Config
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # runner config
+ cfg = Config.fromfile("fcos_r50_caffe_fpn_gn-head_1x_coco.py")
+
+ cfg.work_dir = "./"
+ cfg['test_dataloader']['batch_size'] = batch_size
+ cfg['test_dataloader']['dataset']['data_root'] = args.datasets
+ cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
+ cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+ cfg['log_level'] = 'ERROR'
+
+ # build runner
+ runner = RUNNERS.build(cfg)
+
+ for data in tqdm(runner.test_dataloader):
+ cls_score = []
+ box_reg = []
+ score_factors = []
+
+ input_data = runner.model.data_preprocessor(data, False)
+ image = input_data['inputs'].cpu()
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input("input", tvm.nd.array(image, device))
+
+ module.run()
+
+ for i in range(module.get_num_outputs()):
+ output = module.get_output(i).asnumpy()
+
+ if pad_batch:
+ output = output[:origin_size]
+
+ output = torch.from_numpy(output)
+
+ if output.shape[1] == 80:
+ cls_score.append(output)
+ elif output.shape[1] == 4:
+ box_reg.append(output)
+ else:
+ score_factors.append(output)
+
+ batch_img_metas = [
+ data_samples.metainfo for data_samples in data['data_samples']
+ ]
+
+ preds = runner.model.bbox_head.predict_by_feat(
+ cls_score, box_reg, score_factors=score_factors, batch_img_metas=batch_img_metas, rescale=True
+ )
+
+ batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], preds)
+
+ runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=data)
+
+ metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/detection/fcos/igie/scripts/infer_fcos_fp16_accuracy.sh b/models/cv/detection/fcos/igie/scripts/infer_fcos_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c09f2c330d0b1880ae3453c2880cbd63bd07350b
--- /dev/null
+++ b/models/cv/detection/fcos/igie/scripts/infer_fcos_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="fcos_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,800,800 \
+ --precision fp16 \
+ --engine_path fcos_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine fcos_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/detection/fcos/igie/scripts/infer_fcos_fp16_performance.sh b/models/cv/detection/fcos/igie/scripts/infer_fcos_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8f358c38f286b2725a8cc1738451a85ecad11d3d
--- /dev/null
+++ b/models/cv/detection/fcos/igie/scripts/infer_fcos_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="fcos_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,800,800 \
+ --precision fp16 \
+ --engine_path fcos_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine fcos_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/igie/README.md b/models/cv/detection/fsaf/igie/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7a587dc0e10d1c382019a0b18c7bbe0574d26994
--- /dev/null
+++ b/models/cv/detection/fsaf/igie/README.md
@@ -0,0 +1,65 @@
+# FSAF
+
+## Description
+
+The FSAF (Feature Selective Anchor-Free) module is an innovative component for single-shot object detection that enhances performance through online feature selection and anchor-free branches. The FSAF module dynamically selects the most suitable feature level for each object instance, rather than relying on traditional anchor-based heuristic methods. This improvement significantly boosts the accuracy of object detection, especially for small targets and in complex scenes. Moreover, compared to existing anchor-based detectors, the FSAF module maintains high efficiency while adding negligible additional inference overhead.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install onnxsim
+pip3 install mmdet==3.3.0
+pip3 install mmdeploy==1.3.1
+pip3 install mmengine==0.10.4
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+```bash
+wget https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth
+```
+
+### Model Conversion
+
+```bash
+# export onnx model
+python3 export.py --weight fsaf_r50_fpn_1x_coco-94ccc51f.pth --cfg fsaf_r50_fpn_1x_coco.py --output fsaf.onnx
+
+# use onnxsim optimize onnx model
+onnxsim fsaf.onnx fsaf_opt.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_fsaf_fp16_accuracy.sh
+# Performance
+bash scripts/infer_fsaf_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Input Shape | Precision | FPS | mAP@0.5(%) |
+| :---: | :-------: | :---------: | :-------: | :-----: | :--------: |
+| FSAF | 32 | 800x800 | FP16 | 178.748 | 0.530 |
diff --git a/models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py b/models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..425d3d32efbaede948e1ac55e27d65be4ab26283
--- /dev/null
+++ b/models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# model settings
+model = dict(
+ type='RetinaNet',
+ data_preprocessor=dict(
+ type='DetDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True,
+ pad_size_divisor=32),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=True,
+ style='pytorch',
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ start_level=1,
+ add_extra_convs='on_input',
+ num_outs=5),
+ bbox_head=dict(
+ type='RetinaHead',
+ num_classes=80,
+ in_channels=256,
+ stacked_convs=4,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ octave_base_scale=4,
+ scales_per_octave=3,
+ ratios=[0.5, 1.0, 2.0],
+ strides=[8, 16, 32, 64, 128]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[.0, .0, .0, .0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
+ loss_cls=dict(
+ type='FocalLoss',
+ use_sigmoid=True,
+ gamma=2.0,
+ alpha=0.25,
+ loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+ # model training and testing settings
+ train_cfg=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.4,
+ min_pos_iou=0,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='PseudoSampler'), # Focal loss should use PseudoSampler
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False),
+ test_cfg=dict(
+ nms_pre=1000,
+ min_bbox_size=0,
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100))
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/igie/build_engine.py b/models/cv/detection/fsaf/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/detection/fsaf/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/igie/deploy_default.py b/models/cv/detection/fsaf/igie/deploy_default.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8d8e43dc829456f0c2e46a7acfc3128757f945d
--- /dev/null
+++ b/models/cv/detection/fsaf/igie/deploy_default.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+onnx_config = dict(
+ type='onnx',
+ export_params=True,
+ keep_initializers_as_inputs=False,
+ opset_version=11,
+ save_file='end2end.onnx',
+ input_names=['input'],
+ output_names=['output'],
+ input_shape=None,
+ optimize=True)
+
+codebase_config = dict(
+ type='mmdet',
+ task='ObjectDetection',
+ model_type='end2end',
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ ))
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/igie/export.py b/models/cv/detection/fsaf/igie/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..13573c9dff3d96be4ba59eaa8698d67fb1d50f13
--- /dev/null
+++ b/models/cv/detection/fsaf/igie/export.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ deploy_cfg = 'deploy_default.py'
+ model_cfg = args.cfg
+ model_checkpoint = args.weight
+
+ deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+ task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+ model = task_processor.build_pytorch_model(model_checkpoint)
+
+ input_names = ['input']
+ dynamic_axes = {'input': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 800, 800)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py b/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py
new file mode 100755
index 0000000000000000000000000000000000000000..33c2df60e5472e22e57616614cad5ff4fb21e984
--- /dev/null
+++ b/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py
@@ -0,0 +1,278 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+backend_args = None
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+ checkpoint=dict(interval=1, type='CheckpointHook'),
+ logger=dict(interval=50, type='LoggerHook'),
+ param_scheduler=dict(type='ParamSchedulerHook'),
+ sampler_seed=dict(type='DistSamplerSeedHook'),
+ timer=dict(type='IterTimerHook'),
+ visualization=dict(type='DetVisualizationHook'))
+default_scope = 'mmdet'
+env_cfg = dict(
+ cudnn_benchmark=False,
+ dist_cfg=dict(backend='nccl'),
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+load_from = None
+log_level = 'ERROR'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+model = dict(
+ backbone=dict(
+ depth=50,
+ frozen_stages=1,
+ init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'),
+ norm_cfg=dict(requires_grad=True, type='BN'),
+ norm_eval=True,
+ num_stages=4,
+ out_indices=(
+ 0,
+ 1,
+ 2,
+ 3,
+ ),
+ style='pytorch',
+ type='ResNet'),
+ bbox_head=dict(
+ anchor_generator=dict(
+ octave_base_scale=1,
+ ratios=[
+ 1.0,
+ ],
+ scales_per_octave=1,
+ strides=[
+ 8,
+ 16,
+ 32,
+ 64,
+ 128,
+ ],
+ type='AnchorGenerator'),
+ bbox_coder=dict(normalizer=4.0, type='TBLRBBoxCoder'),
+ feat_channels=256,
+ in_channels=256,
+ loss_bbox=dict(
+ eps=1e-06, loss_weight=1.0, reduction='none', type='IoULoss'),
+ loss_cls=dict(
+ alpha=0.25,
+ gamma=2.0,
+ loss_weight=1.0,
+ reduction='none',
+ type='FocalLoss',
+ use_sigmoid=True),
+ num_classes=80,
+ reg_decoded_bbox=True,
+ stacked_convs=4,
+ type='FSAFHead'),
+ data_preprocessor=dict(
+ bgr_to_rgb=True,
+ mean=[
+ 123.675,
+ 116.28,
+ 103.53,
+ ],
+ pad_size_divisor=32,
+ std=[
+ 58.395,
+ 57.12,
+ 57.375,
+ ],
+ type='DetDataPreprocessor'),
+ neck=dict(
+ add_extra_convs='on_input',
+ in_channels=[
+ 256,
+ 512,
+ 1024,
+ 2048,
+ ],
+ num_outs=5,
+ out_channels=256,
+ start_level=1,
+ type='FPN'),
+ test_cfg=dict(
+ max_per_img=100,
+ min_bbox_size=0,
+ nms=dict(iou_threshold=0.5, type='nms'),
+ nms_pre=1000,
+ score_thr=0.05),
+ train_cfg=dict(
+ allowed_border=-1,
+ assigner=dict(
+ min_pos_iof=0.01,
+ neg_scale=0.2,
+ pos_scale=0.2,
+ type='CenterRegionAssigner'),
+ debug=False,
+ pos_weight=-1,
+ sampler=dict(type='PseudoSampler')),
+ type='FSAF')
+optim_wrapper = dict(
+ optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
+ type='OptimWrapper')
+param_scheduler = [
+ dict(
+ begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
+ dict(
+ begin=0,
+ by_epoch=True,
+ end=12,
+ gamma=0.1,
+ milestones=[
+ 8,
+ 11,
+ ],
+ type='MultiStepLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+ batch_size=32,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='images/val2017/'),
+ data_root='/root/.igie_cache/modelzoo_data/datasets/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=False, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+ ann_file=
+ '/root/.igie_cache/modelzoo_data/datasets/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+test_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+]
+train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
+ batch_size=2,
+ dataset=dict(
+ ann_file='annotations/instances_train2017.json',
+ backend_args=None,
+ data_prefix=dict(img='train2017/'),
+ data_root='data/coco/',
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+ ],
+ type='CocoDataset'),
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+ batch_size=1,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='val2017/'),
+ data_root='data/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+ ann_file='data/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+vis_backends = [
+ dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+ name='visualizer',
+ type='DetLocalVisualizer',
+ vis_backends=[
+ dict(type='LocalVisBackend'),
+ ])
+work_dir = './'
diff --git a/models/cv/detection/fsaf/igie/inference.py b/models/cv/detection/fsaf/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..058a53433e2ac0e02f3650846980c2d5460d5e90
--- /dev/null
+++ b/models/cv/detection/fsaf/igie/inference.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import argparse
+import tvm
+import torch
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from mmpose.registry import RUNNERS
+from mmengine.config import Config
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # runner config
+ cfg = Config.fromfile("fsaf_r50_fpn_1x_coco.py")
+
+ cfg.work_dir = "./"
+ cfg['test_dataloader']['batch_size'] = batch_size
+ cfg['test_dataloader']['dataset']['data_root'] = args.datasets
+ cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
+ cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+ cfg['log_level'] = 'ERROR'
+
+ # build runner
+ runner = RUNNERS.build(cfg)
+
+ for data in tqdm(runner.test_dataloader):
+ cls_score = []
+ box_reg = []
+
+ input_data = runner.model.data_preprocessor(data, False)
+ image = input_data['inputs'].cpu()
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input("input", tvm.nd.array(image, device))
+
+ module.run()
+
+ for i in range(module.get_num_outputs()):
+ output = module.get_output(i).asnumpy()
+
+ if pad_batch:
+ output = output[:origin_size]
+
+ output = torch.from_numpy(output)
+
+ if output.shape[1] == 80:
+ cls_score.append(output)
+ elif output.shape[1] == 4:
+ box_reg.append(output)
+
+ batch_img_metas = [
+ data_samples.metainfo for data_samples in data['data_samples']
+ ]
+
+ preds = runner.model.bbox_head.predict_by_feat(
+ cls_score, box_reg, batch_img_metas=batch_img_metas, rescale=True
+ )
+
+ batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], preds)
+
+ runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=data)
+
+ metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/detection/fsaf/igie/scripts/infer_fsaf_fp16_accuracy.sh b/models/cv/detection/fsaf/igie/scripts/infer_fsaf_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..94bd0b406f0fe03ab33a040e544771da5322bb89
--- /dev/null
+++ b/models/cv/detection/fsaf/igie/scripts/infer_fsaf_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="fsaf_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,800,800 \
+ --precision fp16 \
+ --engine_path fsaf_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine fsaf_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/igie/scripts/infer_fsaf_fp16_performance.sh b/models/cv/detection/fsaf/igie/scripts/infer_fsaf_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cb8f93aa049143ebdef57db7a8b17a310e8b91d4
--- /dev/null
+++ b/models/cv/detection/fsaf/igie/scripts/infer_fsaf_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="fsaf_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,800,800 \
+ --precision fp16 \
+ --engine_path fsaf_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine fsaf_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/detection/retinaface/igie/README.md b/models/cv/detection/retinaface/igie/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..a4eb4e4d406b3b225c4db9f4980a9f35b757aa3d
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/README.md
@@ -0,0 +1,67 @@
+# RetinaFace
+
+## Description
+
+RetinaFace is an efficient single-stage face detection model that employs a multi-task learning strategy to simultaneously predict facial locations, landmarks, and 3D facial shapes. It utilizes feature pyramids and context modules to extract multi-scale features and employs a self-supervised mesh decoder to enhance detection accuracy. RetinaFace demonstrates excellent performance on datasets like WIDER FACE, supports real-time processing, and its code and datasets are publicly available for researchers.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install onnxsim
+pip3 install opencv-python==4.6.0.66
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+```bash
+wget https://github.com/biubug6/Face-Detector-1MB-with-landmark/raw/master/weights/mobilenet0.25_Final.pth
+```
+
+### Model Conversion
+
+```bash
+# export onnx model
+python3 export.py --weight mobilenet0.25_Final.pth --output retinaface.onnx
+
+# use onnxsim optimize onnx model
+onnxsim retinaface.onnx retinaface_opt.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/widerface/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_retinaface_fp16_accuracy.sh
+# Performance
+bash scripts/infer_retinaface_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Easy AP(%) | Medium AP (%) | Hard AP(%) |
+| :--------: | :-------: | :-------: | :------: | :--------: | :-----------: | :--------: |
+| RetinaFace | 32 | FP16 | 8304.626 | 80.13 | 68.52 | 36.59 |
+
+## Reference
+
+Face-Detector-1MB-with-landmark:
diff --git a/models/cv/detection/retinaface/igie/build_engine.py b/models/cv/detection/retinaface/igie/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3626ae76cc9781d9a01ec3d3e2afbdbca409ff5
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--model_path",
+ type=str,
+ required=True,
+ help="original model path.")
+
+ parser.add_argument("--engine_path",
+ type=str,
+ required=True,
+ help="igie export engine path.")
+
+ parser.add_argument("--input",
+ type=str,
+ required=True,
+ help="""
+ input info of the model, format should be:
+ input_name:input_shape
+ eg: --input input:1,3,224,224.
+ """)
+
+ parser.add_argument("--precision",
+ type=str,
+ choices=["fp32", "fp16", "int8"],
+ required=True,
+ help="model inference precision.")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ # get input valueinfo
+ input_name, input_shape = args.input.split(":")
+ shape = tuple([int(s) for s in input_shape.split(",")])
+ input_dict = {input_name: shape}
+
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+
+ mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+ # build engine
+ lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+ # export engine
+ lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/retinaface/igie/export.py b/models/cv/detection/retinaface/igie/export.py
new file mode 100755
index 0000000000000000000000000000000000000000..a1816645c2cf6009d2f4529bdd340832db64a044
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/export.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import torch
+from models.retinaface import RetinaFace
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def check_keys(model, pretrained_state_dict):
+ ckpt_keys = set(pretrained_state_dict.keys())
+ model_keys = set(model.state_dict().keys())
+ used_pretrained_keys = model_keys & ckpt_keys
+ unused_pretrained_keys = ckpt_keys - model_keys
+ missing_keys = model_keys - ckpt_keys
+ print('Missing keys:{}'.format(len(missing_keys)))
+ print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
+ print('Used keys:{}'.format(len(used_pretrained_keys)))
+ assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
+ return True
+
+
+def remove_prefix(state_dict, prefix):
+ ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
+ print('remove prefix \'{}\''.format(prefix))
+ f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
+ return {f(key): value for key, value in state_dict.items()}
+
+
+def load_model(model, pretrained_path):
+ print('Loading pretrained model from {}'.format(pretrained_path))
+ pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
+
+ if "state_dict" in pretrained_dict.keys():
+ pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
+ else:
+ pretrained_dict = remove_prefix(pretrained_dict, 'module.')
+ check_keys(model, pretrained_dict)
+ model.load_state_dict(pretrained_dict, strict=False)
+ return model
+
+def main():
+ args = parse_args()
+
+ cfg_mnet = {
+ 'name': 'mobilenet0.25',
+ 'min_sizes': [[10, 20], [32, 64], [128, 256]],
+ 'steps': [8, 16, 32],
+ 'variance': [0.1, 0.2],
+ 'clip': False,
+ 'loc_weight': 2.0,
+ 'gpu_train': True,
+ 'batch_size': 32,
+ 'ngpu': 1,
+ 'epoch': 250,
+ 'decay1': 190,
+ 'decay2': 220,
+ 'image_size': 300,
+ 'pretrain': False,
+ 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
+ 'in_channel': 32,
+ 'out_channel': 64
+ }
+ model = RetinaFace(cfg = cfg_mnet, phase = 'test')
+
+ # load weight
+ model = load_model(model, args.weight)
+ model.eval()
+
+ input_names = ["input"]
+ output_names = ["output"]
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 320, 320)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+ print("Export onnx model successfully! ")
+
+
+if __name__ == '__main__':
+ main()
+
+
diff --git a/models/cv/detection/retinaface/igie/inference.py b/models/cv/detection/retinaface/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..44d29d1b9e1c8075c1212cecbe13ed683ebf1d7f
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/inference.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import tvm
+import torch
+import argparse
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torch.utils.data import Dataset
+from utils.post_process import post_process
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)):
+ shape = im.shape[:2]
+ if isinstance(new_shape, int):
+ new_shape = (new_shape, new_shape)
+ r = 1
+
+ # Compute padding
+ ratio = r, r
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
+ dw /= 2
+ dh /= 2
+
+ if shape[::-1] != new_unpad:
+ im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+ im = cv2.copyMakeBorder(im,
+ top,
+ bottom,
+ left,
+ right,
+ cv2.BORDER_CONSTANT,
+ value=color)
+ return im, ratio, (dw, dh)
+
+
+class FaceDataset(Dataset):
+ def __init__(self, img_path,image_size=320, layout="NCHW"):
+
+ self.imgs_path = []
+ self.imgs_path_ori=[]
+ self.image_size=image_size
+ self.layout = layout
+ self.img_dir=os.path.dirname(img_path)
+ with open(img_path, 'r') as fr:
+ self.imgs_path = fr.read().split()
+ self.imgs_path_ori=self.imgs_path
+
+ def __len__(self):
+ return len(self.imgs_path)
+
+ def __getitem__(self, idx):
+ img, (h0, w0), (h, w) = self._load_image(idx)
+ img, ratio, pad = letterbox(img,
+ self.image_size,
+ color=(114,114,114))
+ shapes = (h0, w0), ((h / h0, w / w0), pad),(h, w)
+ img = img.astype(np.float32)
+ img -= (104, 117, 123)
+ img = img.transpose(2, 0, 1)
+
+ return img, self.imgs_path[idx], shapes, self.imgs_path_ori[idx]
+
+
+ @staticmethod
+ def collate_fn(batch):
+ im, path, shapes, path_ori = zip(*batch)
+ return np.concatenate([i[None] for i in im], axis=0), path, shapes, path_ori
+
+ def _load_image(self, i):
+ im = cv2.imread(self.img_dir+'/images'+self.imgs_path[i], cv2.IMREAD_COLOR)
+ h0, w0 = im.shape[:2]
+ r = self.image_size / max(h0, w0)
+ if r != 1:
+ im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
+ interpolation=cv2.INTER_LINEAR)
+ return im.astype("float32"), (h0, w0), im.shape[:2]
+
+def get_dataloader(args):
+ image_size = 320
+ batchsize = args.batchsize
+ data_path = os.path.join(args.datasets, 'val/wider_val.txt')
+ datasets =FaceDataset(data_path, image_size)
+ dataLoader = torch.utils.data.DataLoader(datasets, batchsize, drop_last=False, collate_fn=datasets.collate_fn)
+
+ return dataLoader
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warmup
+ for _ in range(args.warmup):
+ module.run()
+
+ dataloader = get_dataloader(args)
+
+ for batch in tqdm(dataloader):
+ image = batch[0]
+ shapes = batch[2]
+ img_names = batch[3]
+
+ pad_batch = len(image) != batch_size
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input("input", tvm.nd.array(image, device))
+
+ module.run()
+
+ loc_bs, conf_bs, landms_bs = module.get_output(0).asnumpy(), module.get_output(1).asnumpy(), module.get_output(2).asnumpy()
+
+ if pad_batch:
+ loc_bs = loc_bs[:origin_size]
+ conf_bs = conf_bs[:origin_size]
+ landms_bs = landms_bs[:origin_size]
+
+ ## batch accuracy
+ post_process(shapes, img_names, loc_bs, conf_bs, landms_bs, save_folder='./widerface_evaluate/widerface_txt/')
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/detection/retinaface/igie/models/net.py b/models/cv/detection/retinaface/igie/models/net.py
new file mode 100644
index 0000000000000000000000000000000000000000..981be825f619ab3d655553d013f987f0e129a4f7
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/models/net.py
@@ -0,0 +1,148 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+def conv_bn(inp, oup, stride = 1):
+ return nn.Sequential(
+ nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+ nn.BatchNorm2d(oup),
+ nn.ReLU(inplace=True)
+ )
+
+def conv_bn_no_relu(inp, oup, stride):
+ return nn.Sequential(
+ nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+ nn.BatchNorm2d(oup),
+ )
+
+def conv_bn1X1(inp, oup, stride):
+ return nn.Sequential(
+ nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
+ nn.BatchNorm2d(oup),
+ nn.ReLU(inplace=True)
+ )
+
+def conv_dw(inp, oup, stride):
+ return nn.Sequential(
+ nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+ nn.BatchNorm2d(inp),
+ nn.ReLU(inplace=True),
+
+ nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+ nn.BatchNorm2d(oup),
+ nn.ReLU(inplace=True)
+ )
+
+class SSH(nn.Module):
+ def __init__(self, in_channel, out_channel):
+ super(SSH, self).__init__()
+ assert out_channel % 4 == 0
+ leaky = 0
+ if (out_channel <= 64):
+ leaky = 0.1
+ self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
+
+ self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1)
+ self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
+
+ self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1)
+ self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
+
+ def forward(self, input):
+ conv3X3 = self.conv3X3(input)
+
+ conv5X5_1 = self.conv5X5_1(input)
+ conv5X5 = self.conv5X5_2(conv5X5_1)
+
+ conv7X7_2 = self.conv7X7_2(conv5X5_1)
+ conv7X7 = self.conv7x7_3(conv7X7_2)
+
+ out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
+ out = F.relu(out)
+ return out
+
+class FPN(nn.Module):
+ def __init__(self,in_channels_list,out_channels):
+ super(FPN,self).__init__()
+ leaky = 0
+ if (out_channels <= 64):
+ leaky = 0.1
+ self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1)
+ self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1)
+ self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1)
+
+ self.merge1 = conv_bn(out_channels, out_channels)
+ self.merge2 = conv_bn(out_channels, out_channels)
+
+ def forward(self, input):
+ # names = list(input.keys())
+ input = list(input.values())
+
+ output1 = self.output1(input[0])
+ output2 = self.output2(input[1])
+ output3 = self.output3(input[2])
+
+ up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
+ output2 = output2 + up3
+ output2 = self.merge2(output2)
+
+ up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
+ output1 = output1 + up2
+ output1 = self.merge1(output1)
+
+ out = [output1, output2, output3]
+ return out
+
+
+
+class MobileNetV1(nn.Module):
+ def __init__(self):
+ super(MobileNetV1, self).__init__()
+ self.stage1 = nn.Sequential(
+ conv_bn(3, 8, 2), # 3
+ conv_dw(8, 16, 1), # 7
+ conv_dw(16, 32, 2), # 11
+ conv_dw(32, 32, 1), # 19
+ conv_dw(32, 64, 2), # 27
+ conv_dw(64, 64, 1), # 43
+ )
+ self.stage2 = nn.Sequential(
+ conv_dw(64, 128, 2), # 43 + 16 = 59
+ conv_dw(128, 128, 1), # 59 + 32 = 91
+ conv_dw(128, 128, 1), # 91 + 32 = 123
+ conv_dw(128, 128, 1), # 123 + 32 = 155
+ conv_dw(128, 128, 1), # 155 + 32 = 187
+ conv_dw(128, 128, 1), # 187 + 32 = 219
+ )
+ self.stage3 = nn.Sequential(
+ conv_dw(128, 256, 2), # 219 +3 2 = 241
+ conv_dw(256, 256, 1), # 241 + 64 = 301
+ )
+ self.avg = nn.AdaptiveAvgPool2d((1,1))
+ self.fc = nn.Linear(256, 1000)
+
+ def forward(self, x):
+ x = self.stage1(x)
+ x = self.stage2(x)
+ x = self.stage3(x)
+ x = self.avg(x)
+ # x = self.model(x)
+ x = x.view(-1, 256)
+ x = self.fc(x)
+ return x
+
diff --git a/models/cv/detection/retinaface/igie/models/retinaface.py b/models/cv/detection/retinaface/igie/models/retinaface.py
new file mode 100644
index 0000000000000000000000000000000000000000..873506e35960e87ecc601eae8a610dee75e02213
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/models/retinaface.py
@@ -0,0 +1,138 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torch.nn as nn
+import torchvision.models._utils as _utils
+import torch.nn.functional as F
+
+from models.net import MobileNetV1 as MobileNetV1
+from models.net import FPN as FPN
+from models.net import SSH as SSH
+
+class ClassHead(nn.Module):
+ def __init__(self,inchannels=512,num_anchors=3):
+ super(ClassHead,self).__init__()
+ self.num_anchors = num_anchors
+ self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
+
+ def forward(self,x):
+ out = self.conv1x1(x)
+ out = out.permute(0,2,3,1).contiguous()
+
+ return out.view(out.shape[0], -1, 2)
+
+class BboxHead(nn.Module):
+ def __init__(self,inchannels=512,num_anchors=3):
+ super(BboxHead,self).__init__()
+ self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
+
+ def forward(self,x):
+ out = self.conv1x1(x)
+ out = out.permute(0,2,3,1).contiguous()
+
+ return out.view(out.shape[0], -1, 4)
+
+class LandmarkHead(nn.Module):
+ def __init__(self,inchannels=512,num_anchors=3):
+ super(LandmarkHead,self).__init__()
+ self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
+
+ def forward(self,x):
+ out = self.conv1x1(x)
+ out = out.permute(0,2,3,1).contiguous()
+
+ return out.view(out.shape[0], -1, 10)
+
+class RetinaFace(nn.Module):
+ def __init__(self, cfg = None, phase = 'train'):
+ """
+ :param cfg: Network related settings.
+ :param phase: train or test.
+ """
+ super(RetinaFace,self).__init__()
+ self.phase = phase
+ backbone = None
+ if cfg['name'] == 'mobilenet0.25':
+ backbone = MobileNetV1()
+ if cfg['pretrain']:
+ checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu'))
+ from collections import OrderedDict
+ new_state_dict = OrderedDict()
+ for k, v in checkpoint['state_dict'].items():
+ name = k[7:] # remove module.
+ new_state_dict[name] = v
+ # load params
+ backbone.load_state_dict(new_state_dict)
+ elif cfg['name'] == 'Resnet50':
+ import torchvision.models as models
+ backbone = models.resnet50(pretrained=cfg['pretrain'])
+
+ self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
+ in_channels_stage2 = cfg['in_channel']
+ in_channels_list = [
+ in_channels_stage2 * 2,
+ in_channels_stage2 * 4,
+ in_channels_stage2 * 8,
+ ]
+ out_channels = cfg['out_channel']
+ self.fpn = FPN(in_channels_list,out_channels)
+ self.ssh1 = SSH(out_channels, out_channels)
+ self.ssh2 = SSH(out_channels, out_channels)
+ self.ssh3 = SSH(out_channels, out_channels)
+
+ self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
+ self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
+ self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
+
+ def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2):
+ classhead = nn.ModuleList()
+ for i in range(fpn_num):
+ classhead.append(ClassHead(inchannels,anchor_num))
+ return classhead
+
+ def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2):
+ bboxhead = nn.ModuleList()
+ for i in range(fpn_num):
+ bboxhead.append(BboxHead(inchannels,anchor_num))
+ return bboxhead
+
+ def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2):
+ landmarkhead = nn.ModuleList()
+ for i in range(fpn_num):
+ landmarkhead.append(LandmarkHead(inchannels,anchor_num))
+ return landmarkhead
+
+ def forward(self,inputs):
+ out = self.body(inputs)
+
+ # FPN
+ fpn = self.fpn(out)
+
+ # SSH
+ feature1 = self.ssh1(fpn[0])
+ feature2 = self.ssh2(fpn[1])
+ feature3 = self.ssh3(fpn[2])
+ features = [feature1, feature2, feature3]
+
+ bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
+ classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
+ ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
+
+ if self.phase == 'train':
+ output = (bbox_regressions, classifications, ldm_regressions)
+ else:
+ output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
+ return output
\ No newline at end of file
diff --git a/models/cv/detection/retinaface/igie/scripts/infer_retinaface_fp16_accuracy.sh b/models/cv/detection/retinaface/igie/scripts/infer_retinaface_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ea43f251a9f1214dbda82062ffd567f382ce2167
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/scripts/infer_retinaface_fp16_accuracy.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="retinaface_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,320,320 \
+ --precision fp16 \
+ --engine_path retinaface_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine retinaface_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
+
+# compute accuracy
+cd widerface_evaluate
+python3 setup.py build_ext --inplace
+python3 evaluation.py
+cd ..
\ No newline at end of file
diff --git a/models/cv/detection/retinaface/igie/scripts/infer_retinaface_fp16_performance.sh b/models/cv/detection/retinaface/igie/scripts/infer_retinaface_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e92b1f56279f4607edb95ff8ae5e84072fd8bc62
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/scripts/infer_retinaface_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="retinaface_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model_path ${model_path} \
+ --input input:${batchsize},3,320,320 \
+ --precision fp16 \
+ --engine_path retinaface_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py \
+ --engine retinaface_bs_${batchsize}_fp16.so \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True
\ No newline at end of file
diff --git a/models/cv/detection/retinaface/igie/utils/box_utils.py b/models/cv/detection/retinaface/igie/utils/box_utils.py
new file mode 100755
index 0000000000000000000000000000000000000000..0bf174cb44da423218d7ce548946d3af2e5cc729
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/utils/box_utils.py
@@ -0,0 +1,344 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import numpy as np
+
+def point_form(boxes):
+ """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
+ representation for comparison to point form ground truth data.
+ Args:
+ boxes: (tensor) center-size default boxes from priorbox layers.
+ Return:
+ boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+ """
+ return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin
+ boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax
+
+
+def center_size(boxes):
+ """ Convert prior_boxes to (cx, cy, w, h)
+ representation for comparison to center-size form ground truth data.
+ Args:
+ boxes: (tensor) point_form boxes
+ Return:
+ boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+ """
+ return torch.cat((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy
+ boxes[:, 2:] - boxes[:, :2], 1) # w, h
+
+
+def intersect(box_a, box_b):
+ """ We resize both tensors to [A,B,2] without new malloc:
+ [A,2] -> [A,1,2] -> [A,B,2]
+ [B,2] -> [1,B,2] -> [A,B,2]
+ Then we compute the area of intersect between box_a and box_b.
+ Args:
+ box_a: (tensor) bounding boxes, Shape: [A,4].
+ box_b: (tensor) bounding boxes, Shape: [B,4].
+ Return:
+ (tensor) intersection area, Shape: [A,B].
+ """
+ A = box_a.size(0)
+ B = box_b.size(0)
+ max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
+ box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
+ min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
+ box_b[:, :2].unsqueeze(0).expand(A, B, 2))
+ inter = torch.clamp((max_xy - min_xy), min=0)
+ return inter[:, :, 0] * inter[:, :, 1]
+
+
+def jaccard(box_a, box_b):
+ """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
+ is simply the intersection over union of two boxes. Here we operate on
+ ground truth boxes and default boxes.
+ E.g.:
+ A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
+ Args:
+ box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
+ box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
+ Return:
+ jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
+ """
+ inter = intersect(box_a, box_b)
+ area_a = ((box_a[:, 2]-box_a[:, 0]) *
+ (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
+ area_b = ((box_b[:, 2]-box_b[:, 0]) *
+ (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
+ union = area_a + area_b - inter
+ return inter / union # [A,B]
+
+
+def matrix_iou(a, b):
+ """
+ return iou of a and b, numpy version for data augenmentation
+ """
+ lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+ rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+
+ area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+ area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+ area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
+ return area_i / (area_a[:, np.newaxis] + area_b - area_i)
+
+
+def matrix_iof(a, b):
+ """
+ return iof of a and b, numpy version for data augenmentation
+ """
+ lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+ rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+
+ area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+ area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+ return area_i / np.maximum(area_a[:, np.newaxis], 1)
+
+
+def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx):
+ """Match each prior box with the ground truth box of the highest jaccard
+ overlap, encode the bounding boxes, then return the matched indices
+ corresponding to both confidence and location preds.
+ Args:
+ threshold: (float) The overlap threshold used when mathing boxes.
+ truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
+ priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
+ variances: (tensor) Variances corresponding to each prior coord,
+ Shape: [num_priors, 4].
+ labels: (tensor) All the class labels for the image, Shape: [num_obj].
+ landms: (tensor) Ground truth landms, Shape [num_obj, 10].
+ loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
+ conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
+ landm_t: (tensor) Tensor to be filled w/ endcoded landm targets.
+ idx: (int) current batch index
+ Return:
+ The matched indices corresponding to 1)location 2)confidence 3)landm preds.
+ """
+ # jaccard index
+ overlaps = jaccard(
+ truths,
+ point_form(priors)
+ )
+ # (Bipartite Matching)
+ # [1,num_objects] best prior for each ground truth
+ best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
+
+ # ignore hard gt
+ valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
+ best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
+ if best_prior_idx_filter.shape[0] <= 0:
+ loc_t[idx] = 0
+ conf_t[idx] = 0
+ return
+
+ # [1,num_priors] best ground truth for each prior
+ best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
+ best_truth_idx.squeeze_(0)
+ best_truth_overlap.squeeze_(0)
+ best_prior_idx.squeeze_(1)
+ best_prior_idx_filter.squeeze_(1)
+ best_prior_overlap.squeeze_(1)
+ best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior
+ # TODO refactor: index best_prior_idx with long tensor
+ # ensure every gt matches with its prior of max overlap
+ for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes
+ best_truth_idx[best_prior_idx[j]] = j
+ matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
+ conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来
+ conf[best_truth_overlap < threshold] = 0 # label as background overlap<0.35的全部作为负样本
+ loc = encode(matches, priors, variances)
+
+ matches_landm = landms[best_truth_idx]
+ landm = encode_landm(matches_landm, priors, variances)
+ loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
+ conf_t[idx] = conf # [num_priors] top class label for each prior
+ landm_t[idx] = landm
+
+
+def encode(matched, priors, variances):
+ """Encode the variances from the priorbox layers into the ground truth boxes
+ we have matched (based on jaccard overlap) with the prior boxes.
+ Args:
+ matched: (tensor) Coords of ground truth for each prior in point-form
+ Shape: [num_priors, 4].
+ priors: (tensor) Prior boxes in center-offset form
+ Shape: [num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ encoded boxes (tensor), Shape: [num_priors, 4]
+ """
+
+ # dist b/t match center and prior's center
+ g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
+ # encode variance
+ g_cxcy /= (variances[0] * priors[:, 2:])
+ # match wh / prior wh
+ g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
+ g_wh = torch.log(g_wh) / variances[1]
+ # return target for smooth_l1_loss
+ return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
+
+def encode_landm(matched, priors, variances):
+ """Encode the variances from the priorbox layers into the ground truth boxes
+ we have matched (based on jaccard overlap) with the prior boxes.
+ Args:
+ matched: (tensor) Coords of ground truth for each prior in point-form
+ Shape: [num_priors, 10].
+ priors: (tensor) Prior boxes in center-offset form
+ Shape: [num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ encoded landm (tensor), Shape: [num_priors, 10]
+ """
+
+ # dist b/t match center and prior's center
+ matched = torch.reshape(matched, (matched.size(0), 5, 2))
+ priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+ priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+ priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+ priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
+ priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
+ g_cxcy = matched[:, :, :2] - priors[:, :, :2]
+ # encode variance
+ g_cxcy /= (variances[0] * priors[:, :, 2:])
+ # g_cxcy /= priors[:, :, 2:]
+ g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
+ # return target for smooth_l1_loss
+ return g_cxcy
+
+
+# Adapted from https://github.com/Hakuyume/chainer-ssd
+def decode(loc, priors, variances):
+ """Decode locations from predictions using priors to undo
+ the encoding we did for offset regression at train time.
+ Args:
+ loc (tensor): location predictions for loc layers,
+ Shape: [num_priors,4]
+ priors (tensor): Prior boxes in center-offset form.
+ Shape: [num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ decoded bounding box predictions
+ """
+
+ boxes = torch.cat((
+ priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+ priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
+ boxes[:, :2] -= boxes[:, 2:] / 2
+ boxes[:, 2:] += boxes[:, :2]
+ return boxes
+
+def decode_landm(pre, priors, variances):
+ """Decode landm from predictions using priors to undo
+ the encoding we did for offset regression at train time.
+ Args:
+ pre (tensor): landm predictions for loc layers,
+ Shape: [num_priors,10]
+ priors (tensor): Prior boxes in center-offset form.
+ Shape: [num_priors,4].
+ variances: (list[float]) Variances of priorboxes
+ Return:
+ decoded landm predictions
+ """
+ landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
+ priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
+ priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
+ priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
+ priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
+ ), dim=1)
+ return landms
+
+
+def log_sum_exp(x):
+ """Utility function for computing log_sum_exp while determining
+ This will be used to determine unaveraged confidence loss across
+ all examples in a batch.
+ Args:
+ x (Variable(tensor)): conf_preds from conf layers
+ """
+ x_max = x.data.max()
+ return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
+
+
+# Original author: Francisco Massa:
+# https://github.com/fmassa/object-detection.torch
+# Ported to PyTorch by Max deGroot (02/01/2017)
+def nms(boxes, scores, overlap=0.5, top_k=200):
+ """Apply non-maximum suppression at test time to avoid detecting too many
+ overlapping bounding boxes for a given object.
+ Args:
+ boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
+ scores: (tensor) The class predscores for the img, Shape:[num_priors].
+ overlap: (float) The overlap thresh for suppressing unnecessary boxes.
+ top_k: (int) The Maximum number of box preds to consider.
+ Return:
+ The indices of the kept boxes with respect to num_priors.
+ """
+
+ keep = torch.Tensor(scores.size(0)).fill_(0).long()
+ if boxes.numel() == 0:
+ return keep
+ x1 = boxes[:, 0]
+ y1 = boxes[:, 1]
+ x2 = boxes[:, 2]
+ y2 = boxes[:, 3]
+ area = torch.mul(x2 - x1, y2 - y1)
+ v, idx = scores.sort(0) # sort in ascending order
+ # I = I[v >= 0.01]
+ idx = idx[-top_k:] # indices of the top-k largest vals
+ xx1 = boxes.new()
+ yy1 = boxes.new()
+ xx2 = boxes.new()
+ yy2 = boxes.new()
+ w = boxes.new()
+ h = boxes.new()
+
+ # keep = torch.Tensor()
+ count = 0
+ while idx.numel() > 0:
+ i = idx[-1] # index of current largest val
+ # keep.append(i)
+ keep[count] = i
+ count += 1
+ if idx.size(0) == 1:
+ break
+ idx = idx[:-1] # remove kept element from view
+ # load bboxes of next highest vals
+ torch.index_select(x1, 0, idx, out=xx1)
+ torch.index_select(y1, 0, idx, out=yy1)
+ torch.index_select(x2, 0, idx, out=xx2)
+ torch.index_select(y2, 0, idx, out=yy2)
+ # store element-wise max with next highest score
+ xx1 = torch.clamp(xx1, min=x1[i])
+ yy1 = torch.clamp(yy1, min=y1[i])
+ xx2 = torch.clamp(xx2, max=x2[i])
+ yy2 = torch.clamp(yy2, max=y2[i])
+ w.resize_as_(xx2)
+ h.resize_as_(yy2)
+ w = xx2 - xx1
+ h = yy2 - yy1
+ # check sizes of xx1 and xx2.. after each iteration
+ w = torch.clamp(w, min=0.0)
+ h = torch.clamp(h, min=0.0)
+ inter = w*h
+ # IoU = i / (area(a) + area(b) - i)
+ rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
+ union = (rem_areas - inter) + area[i]
+ IoU = inter/union # store result in iou
+ # keep only elements with an IoU <= overlap
+ idx = idx[IoU.le(overlap)]
+ return keep, count
+
+
diff --git a/models/cv/detection/retinaface/igie/utils/post_process.py b/models/cv/detection/retinaface/igie/utils/post_process.py
new file mode 100755
index 0000000000000000000000000000000000000000..3affc02214efe8a15bfce61603eaf0d7a8f1a300
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/utils/post_process.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import torch
+import numpy as np
+from .prior_box import PriorBox
+from .box_utils import decode, decode_landm
+from .py_cpu_nms import py_cpu_nms
+
+cfg_mnet = {
+ 'name': 'mobilenet0.25',
+ 'min_sizes': [[10, 20], [32, 64], [128, 256]],
+ 'steps': [8, 16, 32],
+ 'variance': [0.1, 0.2],
+ 'clip': False,
+ 'loc_weight': 2.0,
+ 'gpu_train': True,
+ 'batch_size': 32,
+ 'ngpu': 1,
+ 'epoch': 250,
+ 'decay1': 190,
+ 'decay2': 220,
+ 'image_size': 300,
+ 'pretrain': False,
+ 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
+ 'in_channel': 32,
+ 'out_channel': 64
+}
+
+def clip_boxes(boxes, shape):
+ # Clip boxes (xyxy) to image shape (height, width)
+ if isinstance(boxes, torch.Tensor): # faster individually
+ boxes[:, 0].clamp_(0, shape[1]) # x1
+ boxes[:, 1].clamp_(0, shape[0]) # y1
+ boxes[:, 2].clamp_(0, shape[1]) # x2
+ boxes[:, 3].clamp_(0, shape[0]) # y2
+ else: # np.array (faster grouped)
+ boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
+ boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
+
+def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
+ # Rescale boxes (xyxy) from img1_shape to img0_shape
+ if ratio_pad is None: # calculate from img0_shape
+ gain = min(img1_shape[0] / img0_shape[0],
+ img1_shape[1] / img0_shape[1]) # gain = old / new
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (
+ img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
+ else:
+ gain = ratio_pad[0][0]
+ pad = ratio_pad[1]
+
+ boxes[:, [0, 2]] -= pad[0] # x padding
+ boxes[:, [1, 3]] -= pad[1] # y padding
+ boxes[:, :4] /= gain
+ clip_boxes(boxes, img0_shape)
+ return boxes
+
+def clip_boxes_landm(landm, shape):
+ # Clip boxes (xyxy) to image shape (height, width)
+ if isinstance(landm, torch.Tensor): # faster individually
+ landm[:, 0].clamp_(0, shape[1]) # x1
+ landm[:, 1].clamp_(0, shape[0]) # y1
+ landm[:, 2].clamp_(0, shape[1]) # x2
+ landm[:, 3].clamp_(0, shape[0]) # y2
+ landm[:, 4].clamp_(0, shape[1]) # x1
+ landm[:, 5].clamp_(0, shape[0]) # y1
+ landm[:, 6].clamp_(0, shape[1]) # x2
+ landm[:, 7].clamp_(0, shape[0]) # y2
+ landm[:, 8].clamp_(0, shape[1]) # x2
+ landm[:, 9].clamp_(0, shape[0]) # y2
+ else: # np.array (faster grouped)
+ landm[:, [0, 2,4,6,8]] = landm[:, [0, 2,4,6,8]].clip(0, shape[1]) # x1, x2
+ landm[:, [1, 3,5,7,9]] = landm[:, [1, 3,5,7,9]].clip(0, shape[0]) # y1, y2
+
+def scale_boxes_landm(img1_shape, landm, img0_shape, ratio_pad=None):
+ # Rescale boxes (xyxy) from img1_shape to img0_shape
+ if ratio_pad is None: # calculate from img0_shape
+ gain = min(img1_shape[0] / img0_shape[0],
+ img1_shape[1] / img0_shape[1]) # gain = old / new
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (
+ img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
+ else:
+ gain = ratio_pad[0][0]
+ pad = ratio_pad[1]
+
+ landm[:, [0, 2,4,6,8]] -= pad[0] # x padding
+ landm[:, [1, 3,5,7,9]] -= pad[1] # y padding
+ landm[:, :10] /= gain
+
+ clip_boxes_landm(landm, img0_shape)
+ return landm
+
+def post_process(shapes, img_names, loc_bs, conf_bs, landms_bs, save_folder):
+ max_size = 320
+ confidence_threshold=0.02
+ nms_threshold=0.4
+
+ for idx, loc in enumerate(loc_bs):
+ img_size=[320, 320]
+ im_shape=list(shapes[idx][0]) #ori
+
+ im_size_min = np.min(im_shape[0:2])
+ im_size_max = np.max(im_shape[0:2])
+ resize = float(320) / float(im_size_min)
+ # prevent bigger axis from being more than max_size:
+ if np.round(resize * im_size_max) >max_size:
+ resize = float(max_size) / float(im_size_max)
+
+ scale = torch.Tensor([img_size[1], img_size[0], img_size[1], img_size[0]])
+ scale = scale.to('cpu')
+
+ priorbox = PriorBox(cfg_mnet, image_size=(320, 320))
+ priors = priorbox.forward()
+ priors = priors.to('cpu')
+ prior_data = priors.data
+
+ boxes = decode(torch.from_numpy(loc_bs[idx]).data.squeeze(0).float(), prior_data, cfg_mnet['variance'])
+ boxes = boxes * scale
+ boxes=scale_boxes([320, 320],boxes,im_shape,shapes[idx][1])
+ boxes = boxes.cpu().numpy()
+ scores = torch.from_numpy(conf_bs[idx]).squeeze(0).data.cpu().numpy()[:, 1]
+ landms = decode_landm(torch.from_numpy(landms_bs[idx]).data.squeeze(0), prior_data, cfg_mnet['variance'])
+ img_size=[1,3,img_size[0],img_size[1]]
+
+
+ scale1 = torch.Tensor([img_size[3], img_size[2], img_size[3], img_size[2],
+ img_size[3], img_size[2], img_size[3], img_size[2],
+ img_size[3], img_size[2]])
+ scale1 = scale1.to('cpu')
+
+ landms = landms * scale1
+ landms=scale_boxes_landm([320, 320],landms,im_shape,shapes[idx][1])
+ landms = landms.cpu().numpy()
+
+ # ignore low scores
+ inds = np.where(scores > confidence_threshold)[0]
+ boxes = boxes[inds]
+ landms = landms[inds]
+ scores = scores[inds]
+
+ # keep top-K before NMS
+ order = scores.argsort()[::-1]
+ boxes = boxes[order]
+ landms = landms[order]
+ scores = scores[order]
+
+ # do NMS
+ dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
+ keep = py_cpu_nms(dets, nms_threshold)
+ dets = dets[keep, :]
+ landms = landms[keep]
+
+ dets = np.concatenate((dets, landms), axis=1)
+
+ # --------------------------------------------------------------------
+ save_name = save_folder + img_names[idx][:-4] + ".txt"
+ dirname = os.path.dirname(save_name)
+ if not os.path.isdir(dirname):
+ os.makedirs(dirname)
+ with open(save_name, "w") as fd:
+ bboxs = dets
+ file_name = os.path.basename(save_name)[:-4] + "\n"
+ bboxs_num = str(len(bboxs)) + "\n"
+ fd.write(file_name)
+ fd.write(bboxs_num)
+ for box in bboxs:
+ x = int(box[0])
+ y = int(box[1])
+ w = int(box[2]) - int(box[0])
+ h = int(box[3]) - int(box[1])
+ confidence = str(box[4])
+ line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n"
+ fd.write(line)
diff --git a/models/cv/detection/retinaface/igie/utils/prior_box.py b/models/cv/detection/retinaface/igie/utils/prior_box.py
new file mode 100755
index 0000000000000000000000000000000000000000..da085d2215d05f519a0e0412bf0a700f3fabf366
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/utils/prior_box.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+from itertools import product as product
+import numpy as np
+from math import ceil
+
+
+class PriorBox(object):
+ def __init__(self, cfg, image_size=None, phase='train'):
+ super(PriorBox, self).__init__()
+ self.min_sizes = cfg['min_sizes']
+ self.steps = cfg['steps']
+ self.clip = cfg['clip']
+ self.image_size = image_size
+ self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
+ self.name = "s"
+
+ def forward(self):
+ anchors = []
+ for k, f in enumerate(self.feature_maps):
+ min_sizes = self.min_sizes[k]
+ for i, j in product(range(f[0]), range(f[1])):
+ for min_size in min_sizes:
+ s_kx = min_size / self.image_size[1]
+ s_ky = min_size / self.image_size[0]
+ dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
+ dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
+ for cy, cx in product(dense_cy, dense_cx):
+ anchors += [cx, cy, s_kx, s_ky]
+
+ # back to torch land
+ output = torch.Tensor(anchors).view(-1, 4)
+ if self.clip:
+ output.clamp_(max=1, min=0)
+ return output
diff --git a/models/cv/detection/retinaface/igie/utils/py_cpu_nms.py b/models/cv/detection/retinaface/igie/utils/py_cpu_nms.py
new file mode 100755
index 0000000000000000000000000000000000000000..161cd07bcf204d13a6e5f39c7042e708c73661f2
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/utils/py_cpu_nms.py
@@ -0,0 +1,52 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import numpy as np
+
+def py_cpu_nms(dets, thresh):
+ """Pure Python NMS baseline."""
+ x1 = dets[:, 0]
+ y1 = dets[:, 1]
+ x2 = dets[:, 2]
+ y2 = dets[:, 3]
+ scores = dets[:, 4]
+
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while order.size > 0:
+ i = order[0]
+ keep.append(i)
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1 + 1)
+ h = np.maximum(0.0, yy2 - yy1 + 1)
+ inter = w * h
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+ inds = np.where(ovr <= thresh)[0]
+ order = order[inds + 1]
+
+ return keep
diff --git a/models/cv/detection/retinaface/igie/widerface_evaluate/README.md b/models/cv/detection/retinaface/igie/widerface_evaluate/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..95952b7b481e561ad6da3e7d562ce71b56f4b4a4
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/widerface_evaluate/README.md
@@ -0,0 +1,27 @@
+# WiderFace-Evaluation
+Python Evaluation Code for [Wider Face Dataset](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/)
+
+
+## Usage
+
+
+##### before evaluating ....
+
+````
+python3 setup.py build_ext --inplace
+````
+
+##### evaluating
+
+**GroungTruth:** `wider_face_val.mat`, `wider_easy_val.mat`, `wider_medium_val.mat`,`wider_hard_val.mat`
+
+````
+python3 evaluation.py -p -g
+````
+
+## Bugs & Problems
+please issue
+
+## Acknowledgements
+
+some code borrowed from Sergey Karayev
diff --git a/models/cv/detection/retinaface/igie/widerface_evaluate/box_overlaps.c b/models/cv/detection/retinaface/igie/widerface_evaluate/box_overlaps.c
new file mode 100755
index 0000000000000000000000000000000000000000..4926be24e3be99009998301b4d2f1490424b8133
--- /dev/null
+++ b/models/cv/detection/retinaface/igie/widerface_evaluate/box_overlaps.c
@@ -0,0 +1,6871 @@
+/* Generated by Cython 0.29.33 */
+
+/* BEGIN: Cython Metadata
+{
+ "distutils": {
+ "depends": [
+ "/home/work/zhaoxq/miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/core/include/numpy/arrayobject.h",
+ "/home/work/zhaoxq/miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/core/include/numpy/ufuncobject.h"
+ ],
+ "include_dirs": [
+ "/home/work/zhaoxq/miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/core/include"
+ ],
+ "name": "bbox",
+ "sources": [
+ "box_overlaps.pyx"
+ ]
+ },
+ "module_name": "bbox"
+}
+END: Cython Metadata */
+
+#ifndef PY_SSIZE_T_CLEAN
+#define PY_SSIZE_T_CLEAN
+#endif /* PY_SSIZE_T_CLEAN */
+#include "Python.h"
+#ifndef Py_PYTHON_H
+ #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
+ #error Cython requires Python 2.6+ or Python 3.3+.
+#else
+#define CYTHON_ABI "0_29_33"
+#define CYTHON_HEX_VERSION 0x001D21F0
+#define CYTHON_FUTURE_DIVISION 0
+#include
+#ifndef offsetof
+ #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+ #ifndef __stdcall
+ #define __stdcall
+ #endif
+ #ifndef __cdecl
+ #define __cdecl
+ #endif
+ #ifndef __fastcall
+ #define __fastcall
+ #endif
+#endif
+#ifndef DL_IMPORT
+ #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+ #define DL_EXPORT(t) t
+#endif
+#define __PYX_COMMA ,
+#ifndef HAVE_LONG_LONG
+ #if PY_VERSION_HEX >= 0x02070000
+ #define HAVE_LONG_LONG
+ #endif
+#endif
+#ifndef PY_LONG_LONG
+ #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef Py_HUGE_VAL
+ #define Py_HUGE_VAL HUGE_VAL
+#endif
+#ifdef PYPY_VERSION
+ #define CYTHON_COMPILING_IN_PYPY 1
+ #define CYTHON_COMPILING_IN_PYSTON 0
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #undef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 0
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #if PY_VERSION_HEX < 0x03050000
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #undef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 0
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #undef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 1
+ #undef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 0
+ #undef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 0
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #undef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+ #undef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE 0
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+ #endif
+#elif defined(PYSTON_VERSION)
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_PYSTON 1
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #ifndef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #ifndef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 1
+ #endif
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #ifndef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 0
+ #endif
+ #ifndef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 1
+ #endif
+ #ifndef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 1
+ #endif
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #undef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+ #undef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE 0
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+ #endif
+#elif defined(PY_NOGIL)
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_PYSTON 0
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_NOGIL 1
+ #ifndef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #ifndef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #ifndef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 1
+ #endif
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #ifndef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 0
+ #endif
+ #ifndef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 1
+ #endif
+ #ifndef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 1
+ #endif
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+ #endif
+ #ifndef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE 1
+ #endif
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+#else
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_PYSTON 0
+ #define CYTHON_COMPILING_IN_CPYTHON 1
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #ifndef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 1
+ #endif
+ #if PY_VERSION_HEX < 0x02070000
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #elif !defined(CYTHON_USE_PYTYPE_LOOKUP)
+ #define CYTHON_USE_PYTYPE_LOOKUP 1
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #if PY_VERSION_HEX < 0x02070000
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #elif !defined(CYTHON_USE_PYLONG_INTERNALS)
+ #define CYTHON_USE_PYLONG_INTERNALS 1
+ #endif
+ #ifndef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 1
+ #endif
+ #ifndef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 1
+ #endif
+ #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #elif !defined(CYTHON_USE_UNICODE_WRITER)
+ #define CYTHON_USE_UNICODE_WRITER 1
+ #endif
+ #ifndef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 0
+ #endif
+ #ifndef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 1
+ #endif
+ #ifndef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 1
+ #endif
+ #if PY_VERSION_HEX >= 0x030B00A4
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #elif !defined(CYTHON_FAST_THREAD_STATE)
+ #define CYTHON_FAST_THREAD_STATE 1
+ #endif
+ #ifndef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL (PY_VERSION_HEX < 0x030A0000)
+ #endif
+ #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT (PY_VERSION_HEX >= 0x03050000)
+ #endif
+ #ifndef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1)
+ #endif
+ #ifndef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX >= 0x030600B1)
+ #endif
+ #if PY_VERSION_HEX >= 0x030B00A4
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #elif !defined(CYTHON_USE_EXC_INFO_STACK)
+ #define CYTHON_USE_EXC_INFO_STACK (PY_VERSION_HEX >= 0x030700A3)
+ #endif
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 1
+ #endif
+#endif
+#if !defined(CYTHON_FAST_PYCCALL)
+#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1)
+#endif
+#if CYTHON_USE_PYLONG_INTERNALS
+ #if PY_MAJOR_VERSION < 3
+ #include "longintrepr.h"
+ #endif
+ #undef SHIFT
+ #undef BASE
+ #undef MASK
+ #ifdef SIZEOF_VOID_P
+ enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
+ #endif
+#endif
+#ifndef __has_attribute
+ #define __has_attribute(x) 0
+#endif
+#ifndef __has_cpp_attribute
+ #define __has_cpp_attribute(x) 0
+#endif
+#ifndef CYTHON_RESTRICT
+ #if defined(__GNUC__)
+ #define CYTHON_RESTRICT __restrict__
+ #elif defined(_MSC_VER) && _MSC_VER >= 1400
+ #define CYTHON_RESTRICT __restrict
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define CYTHON_RESTRICT restrict
+ #else
+ #define CYTHON_RESTRICT
+ #endif
+#endif
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+#endif
+#ifndef CYTHON_MAYBE_UNUSED_VAR
+# if defined(__cplusplus)
+ template void CYTHON_MAYBE_UNUSED_VAR( const T& ) { }
+# else
+# define CYTHON_MAYBE_UNUSED_VAR(x) (void)(x)
+# endif
+#endif
+#ifndef CYTHON_NCP_UNUSED
+# if CYTHON_COMPILING_IN_CPYTHON
+# define CYTHON_NCP_UNUSED
+# else
+# define CYTHON_NCP_UNUSED CYTHON_UNUSED
+# endif
+#endif
+#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
+#ifdef _MSC_VER
+ #ifndef _MSC_STDINT_H_
+ #if _MSC_VER < 1300
+ typedef unsigned char uint8_t;
+ typedef unsigned int uint32_t;
+ #else
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int32 uint32_t;
+ #endif
+ #endif
+#else
+ #include
+#endif
+#ifndef CYTHON_FALLTHROUGH
+ #if defined(__cplusplus) && __cplusplus >= 201103L
+ #if __has_cpp_attribute(fallthrough)
+ #define CYTHON_FALLTHROUGH [[fallthrough]]
+ #elif __has_cpp_attribute(clang::fallthrough)
+ #define CYTHON_FALLTHROUGH [[clang::fallthrough]]
+ #elif __has_cpp_attribute(gnu::fallthrough)
+ #define CYTHON_FALLTHROUGH [[gnu::fallthrough]]
+ #endif
+ #endif
+ #ifndef CYTHON_FALLTHROUGH
+ #if __has_attribute(fallthrough)
+ #define CYTHON_FALLTHROUGH __attribute__((fallthrough))
+ #else
+ #define CYTHON_FALLTHROUGH
+ #endif
+ #endif
+ #if defined(__clang__ ) && defined(__apple_build_version__)
+ #if __apple_build_version__ < 7000000
+ #undef CYTHON_FALLTHROUGH
+ #define CYTHON_FALLTHROUGH
+ #endif
+ #endif
+#endif
+
+#ifndef CYTHON_INLINE
+ #if defined(__clang__)
+ #define CYTHON_INLINE __inline__ __attribute__ ((__unused__))
+ #elif defined(__GNUC__)
+ #define CYTHON_INLINE __inline__
+ #elif defined(_MSC_VER)
+ #define CYTHON_INLINE __inline
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define CYTHON_INLINE inline
+ #else
+ #define CYTHON_INLINE
+ #endif
+#endif
+
+#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
+ #define Py_OptimizeFlag 0
+#endif
+#define __PYX_BUILD_PY_SSIZE_T "n"
+#define CYTHON_FORMAT_SSIZE_T "z"
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+ #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+ PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+ #define __Pyx_DefaultClassType PyClass_Type
+#else
+ #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+ #define __Pyx_DefaultClassType PyType_Type
+#if PY_VERSION_HEX >= 0x030B00A1
+ static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int k, int l, int s, int f,
+ PyObject *code, PyObject *c, PyObject* n, PyObject *v,
+ PyObject *fv, PyObject *cell, PyObject* fn,
+ PyObject *name, int fline, PyObject *lnos) {
+ PyObject *kwds=NULL, *argcount=NULL, *posonlyargcount=NULL, *kwonlyargcount=NULL;
+ PyObject *nlocals=NULL, *stacksize=NULL, *flags=NULL, *replace=NULL, *call_result=NULL, *empty=NULL;
+ const char *fn_cstr=NULL;
+ const char *name_cstr=NULL;
+ PyCodeObject* co=NULL;
+ PyObject *type, *value, *traceback;
+ PyErr_Fetch(&type, &value, &traceback);
+ if (!(kwds=PyDict_New())) goto end;
+ if (!(argcount=PyLong_FromLong(a))) goto end;
+ if (PyDict_SetItemString(kwds, "co_argcount", argcount) != 0) goto end;
+ if (!(posonlyargcount=PyLong_FromLong(0))) goto end;
+ if (PyDict_SetItemString(kwds, "co_posonlyargcount", posonlyargcount) != 0) goto end;
+ if (!(kwonlyargcount=PyLong_FromLong(k))) goto end;
+ if (PyDict_SetItemString(kwds, "co_kwonlyargcount", kwonlyargcount) != 0) goto end;
+ if (!(nlocals=PyLong_FromLong(l))) goto end;
+ if (PyDict_SetItemString(kwds, "co_nlocals", nlocals) != 0) goto end;
+ if (!(stacksize=PyLong_FromLong(s))) goto end;
+ if (PyDict_SetItemString(kwds, "co_stacksize", stacksize) != 0) goto end;
+ if (!(flags=PyLong_FromLong(f))) goto end;
+ if (PyDict_SetItemString(kwds, "co_flags", flags) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_code", code) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_consts", c) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_names", n) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_varnames", v) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_freevars", fv) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_cellvars", cell) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_linetable", lnos) != 0) goto end;
+ if (!(fn_cstr=PyUnicode_AsUTF8AndSize(fn, NULL))) goto end;
+ if (!(name_cstr=PyUnicode_AsUTF8AndSize(name, NULL))) goto end;
+ if (!(co = PyCode_NewEmpty(fn_cstr, name_cstr, fline))) goto end;
+ if (!(replace = PyObject_GetAttrString((PyObject*)co, "replace"))) goto cleanup_code_too;
+ if (!(empty = PyTuple_New(0))) goto cleanup_code_too; // unfortunately __pyx_empty_tuple isn't available here
+ if (!(call_result = PyObject_Call(replace, empty, kwds))) goto cleanup_code_too;
+ Py_XDECREF((PyObject*)co);
+ co = (PyCodeObject*)call_result;
+ call_result = NULL;
+ if (0) {
+ cleanup_code_too:
+ Py_XDECREF((PyObject*)co);
+ co = NULL;
+ }
+ end:
+ Py_XDECREF(kwds);
+ Py_XDECREF(argcount);
+ Py_XDECREF(posonlyargcount);
+ Py_XDECREF(kwonlyargcount);
+ Py_XDECREF(nlocals);
+ Py_XDECREF(stacksize);
+ Py_XDECREF(replace);
+ Py_XDECREF(call_result);
+ Py_XDECREF(empty);
+ if (type) {
+ PyErr_Restore(type, value, traceback);
+ }
+ return co;
+ }
+#else
+ #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+ PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#endif
+ #define __Pyx_DefaultClassType PyType_Type
+#endif
+#ifndef Py_TPFLAGS_CHECKTYPES
+ #define Py_TPFLAGS_CHECKTYPES 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_INDEX
+ #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
+ #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_FINALIZE
+ #define Py_TPFLAGS_HAVE_FINALIZE 0
+#endif
+#ifndef METH_STACKLESS
+ #define METH_STACKLESS 0
+#endif
+#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL)
+ #ifndef METH_FASTCALL
+ #define METH_FASTCALL 0x80
+ #endif
+ typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs);
+ typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args,
+ Py_ssize_t nargs, PyObject *kwnames);
+#else
+ #define __Pyx_PyCFunctionFast _PyCFunctionFast
+ #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords
+#endif
+#if CYTHON_FAST_PYCCALL
+#define __Pyx_PyFastCFunction_Check(func)\
+ ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS)))))
+#else
+#define __Pyx_PyFastCFunction_Check(func) 0
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
+ #define PyObject_Malloc(s) PyMem_Malloc(s)
+ #define PyObject_Free(p) PyMem_Free(p)
+ #define PyObject_Realloc(p) PyMem_Realloc(p)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030400A1
+ #define PyMem_RawMalloc(n) PyMem_Malloc(n)
+ #define PyMem_RawRealloc(p, n) PyMem_Realloc(p, n)
+ #define PyMem_RawFree(p) PyMem_Free(p)
+#endif
+#if CYTHON_COMPILING_IN_PYSTON
+ #define __Pyx_PyCode_HasFreeVars(co) PyCode_HasFreeVars(co)
+ #define __Pyx_PyFrame_SetLineNumber(frame, lineno) PyFrame_SetLineNumber(frame, lineno)
+#else
+ #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0)
+ #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno)
+#endif
+#if !CYTHON_FAST_THREAD_STATE || PY_VERSION_HEX < 0x02070000
+ #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#elif PY_VERSION_HEX >= 0x03060000
+ #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet()
+#elif PY_VERSION_HEX >= 0x03000000
+ #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#else
+ #define __Pyx_PyThreadState_Current _PyThreadState_Current
+#endif
+#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT)
+#include "pythread.h"
+#define Py_tss_NEEDS_INIT 0
+typedef int Py_tss_t;
+static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) {
+ *key = PyThread_create_key();
+ return 0;
+}
+static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) {
+ Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t));
+ *key = Py_tss_NEEDS_INIT;
+ return key;
+}
+static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) {
+ PyObject_Free(key);
+}
+static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) {
+ return *key != Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) {
+ PyThread_delete_key(*key);
+ *key = Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) {
+ return PyThread_set_key_value(*key, value);
+}
+static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
+ return PyThread_get_key_value(*key);
+}
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized)
+#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n))
+#else
+#define __Pyx_PyDict_NewPresized(n) PyDict_New()
+#endif
+#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
+#else
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && CYTHON_USE_UNICODE_INTERNALS
+#define __Pyx_PyDict_GetItemStr(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash)
+#else
+#define __Pyx_PyDict_GetItemStr(dict, name) PyDict_GetItem(dict, name)
+#endif
+#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
+ #define CYTHON_PEP393_ENABLED 1
+ #if PY_VERSION_HEX >= 0x030C0000
+ #define __Pyx_PyUnicode_READY(op) (0)
+ #else
+ #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\
+ 0 : _PyUnicode_Ready((PyObject *)(op)))
+ #endif
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+ #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u)
+ #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
+ #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
+ #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
+ #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch)
+ #if PY_VERSION_HEX >= 0x030C0000
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u))
+ #else
+ #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
+ #else
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
+ #endif
+ #endif
+#else
+ #define CYTHON_PEP393_ENABLED 0
+ #define PyUnicode_1BYTE_KIND 1
+ #define PyUnicode_2BYTE_KIND 2
+ #define PyUnicode_4BYTE_KIND 4
+ #define __Pyx_PyUnicode_READY(op) (0)
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+ #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111)
+ #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE))
+ #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
+ #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
+ #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = ch)
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+ #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
+ #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
+#else
+ #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
+ #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
+ PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
+ #define PyUnicode_Contains(u, s) PySequence_Contains(u, s)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyByteArray_Check)
+ #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format)
+ #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt)
+#endif
+#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
+#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
+#else
+ #define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
+#endif
+#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
+ #define PyObject_ASCII(o) PyObject_Repr(o)
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyBaseString_Type PyUnicode_Type
+ #define PyStringObject PyUnicodeObject
+ #define PyString_Type PyUnicode_Type
+ #define PyString_Check PyUnicode_Check
+ #define PyString_CheckExact PyUnicode_CheckExact
+#ifndef PyObject_Unicode
+ #define PyObject_Unicode PyObject_Str
+#endif
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+ #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+ #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
+ #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
+#endif
+#ifndef PySet_CheckExact
+ #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
+#endif
+#if PY_VERSION_HEX >= 0x030900A4
+ #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
+ #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
+#else
+ #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
+ #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
+#endif
+#if CYTHON_ASSUME_SAFE_MACROS
+ #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq)
+#else
+ #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq)
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyIntObject PyLongObject
+ #define PyInt_Type PyLong_Type
+ #define PyInt_Check(op) PyLong_Check(op)
+ #define PyInt_CheckExact(op) PyLong_CheckExact(op)
+ #define PyInt_FromString PyLong_FromString
+ #define PyInt_FromUnicode PyLong_FromUnicode
+ #define PyInt_FromLong PyLong_FromLong
+ #define PyInt_FromSize_t PyLong_FromSize_t
+ #define PyInt_FromSsize_t PyLong_FromSsize_t
+ #define PyInt_AsLong PyLong_AsLong
+ #define PyInt_AS_LONG PyLong_AS_LONG
+ #define PyInt_AsSsize_t PyLong_AsSsize_t
+ #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
+ #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+ #define PyNumber_Int PyNumber_Long
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyBoolObject PyLongObject
+#endif
+#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
+ #ifndef PyUnicode_InternFromString
+ #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
+ #endif
+#endif
+#if PY_VERSION_HEX < 0x030200A4
+ typedef long Py_hash_t;
+ #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+ #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t
+#else
+ #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+ #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyMethod_New(func, self, klass) ((self) ? ((void)(klass), PyMethod_New(func, self)) : __Pyx_NewRef(func))
+#else
+ #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
+#endif
+#if CYTHON_USE_ASYNC_SLOTS
+ #if PY_VERSION_HEX >= 0x030500B1
+ #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
+ #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
+ #else
+ #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
+ #endif
+#else
+ #define __Pyx_PyType_AsAsync(obj) NULL
+#endif
+#ifndef __Pyx_PyAsyncMethodsStruct
+ typedef struct {
+ unaryfunc am_await;
+ unaryfunc am_aiter;
+ unaryfunc am_anext;
+ } __Pyx_PyAsyncMethodsStruct;
+#endif
+
+#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS)
+ #if !defined(_USE_MATH_DEFINES)
+ #define _USE_MATH_DEFINES
+ #endif
+#endif
+#include
+#ifdef NAN
+#define __PYX_NAN() ((float) NAN)
+#else
+static CYTHON_INLINE float __PYX_NAN() {
+ float value;
+ memset(&value, 0xFF, sizeof(value));
+ return value;
+}
+#endif
+#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL)
+#define __Pyx_truncl trunc
+#else
+#define __Pyx_truncl truncl
+#endif
+
+#define __PYX_MARK_ERR_POS(f_index, lineno) \
+ { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
+#define __PYX_ERR(f_index, lineno, Ln_error) \
+ { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
+
+#ifndef __PYX_EXTERN_C
+ #ifdef __cplusplus
+ #define __PYX_EXTERN_C extern "C"
+ #else
+ #define __PYX_EXTERN_C extern
+ #endif
+#endif
+
+#define __PYX_HAVE__bbox
+#define __PYX_HAVE_API__bbox
+/* Early includes */
+#include
+#include
+#include "numpy/arrayobject.h"
+#include "numpy/ufuncobject.h"
+
+ /* NumPy API declarations from "numpy/__init__.pxd" */
+
+#ifdef _OPENMP
+#include
+#endif /* _OPENMP */
+
+#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS)
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
+ const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
+
+#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)
+#define __PYX_DEFAULT_STRING_ENCODING ""
+#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
+#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#define __Pyx_uchar_cast(c) ((unsigned char)c)
+#define __Pyx_long_cast(x) ((long)x)
+#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\
+ (sizeof(type) < sizeof(Py_ssize_t)) ||\
+ (sizeof(type) > sizeof(Py_ssize_t) &&\
+ likely(v < (type)PY_SSIZE_T_MAX ||\
+ v == (type)PY_SSIZE_T_MAX) &&\
+ (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
+ v == (type)PY_SSIZE_T_MIN))) ||\
+ (sizeof(type) == sizeof(Py_ssize_t) &&\
+ (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
+ v == (type)PY_SSIZE_T_MAX))) )
+static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
+ return (size_t) i < (size_t) limit;
+}
+#if defined (__cplusplus) && __cplusplus >= 201103L
+ #include
+ #define __Pyx_sst_abs(value) std::abs(value)
+#elif SIZEOF_INT >= SIZEOF_SIZE_T
+ #define __Pyx_sst_abs(value) abs(value)
+#elif SIZEOF_LONG >= SIZEOF_SIZE_T
+ #define __Pyx_sst_abs(value) labs(value)
+#elif defined (_MSC_VER)
+ #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value))
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define __Pyx_sst_abs(value) llabs(value)
+#elif defined (__GNUC__)
+ #define __Pyx_sst_abs(value) __builtin_llabs(value)
+#else
+ #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
+#endif
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*);
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
+#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
+#define __Pyx_PyBytes_FromString PyBytes_FromString
+#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
+ #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#else
+ #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
+ #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
+#endif
+#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyObject_AsWritableString(s) ((char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableSString(s) ((signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s)
+#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s)
+#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s)
+#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
+static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) {
+ const Py_UNICODE *u_end = u;
+ while (*u_end++) ;
+ return (size_t)(u_end - u - 1);
+}
+#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
+#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
+#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
+#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
+#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
+#define __Pyx_PySequence_Tuple(obj)\
+ (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj))
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*);
+#if CYTHON_ASSUME_SAFE_MACROS
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#else
+#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
+#endif
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
+#else
+#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
+#endif
+#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x))
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+static int __Pyx_sys_getdefaultencoding_not_ascii;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+ PyObject* sys;
+ PyObject* default_encoding = NULL;
+ PyObject* ascii_chars_u = NULL;
+ PyObject* ascii_chars_b = NULL;
+ const char* default_encoding_c;
+ sys = PyImport_ImportModule("sys");
+ if (!sys) goto bad;
+ default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
+ Py_DECREF(sys);
+ if (!default_encoding) goto bad;
+ default_encoding_c = PyBytes_AsString(default_encoding);
+ if (!default_encoding_c) goto bad;
+ if (strcmp(default_encoding_c, "ascii") == 0) {
+ __Pyx_sys_getdefaultencoding_not_ascii = 0;
+ } else {
+ char ascii_chars[128];
+ int c;
+ for (c = 0; c < 128; c++) {
+ ascii_chars[c] = c;
+ }
+ __Pyx_sys_getdefaultencoding_not_ascii = 1;
+ ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
+ if (!ascii_chars_u) goto bad;
+ ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
+ if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
+ PyErr_Format(
+ PyExc_ValueError,
+ "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
+ default_encoding_c);
+ goto bad;
+ }
+ Py_DECREF(ascii_chars_u);
+ Py_DECREF(ascii_chars_b);
+ }
+ Py_DECREF(default_encoding);
+ return 0;
+bad:
+ Py_XDECREF(default_encoding);
+ Py_XDECREF(ascii_chars_u);
+ Py_XDECREF(ascii_chars_b);
+ return -1;
+}
+#endif
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
+#else
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+static char* __PYX_DEFAULT_STRING_ENCODING;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+ PyObject* sys;
+ PyObject* default_encoding = NULL;
+ char* default_encoding_c;
+ sys = PyImport_ImportModule("sys");
+ if (!sys) goto bad;
+ default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
+ Py_DECREF(sys);
+ if (!default_encoding) goto bad;
+ default_encoding_c = PyBytes_AsString(default_encoding);
+ if (!default_encoding_c) goto bad;
+ __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1);
+ if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
+ strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
+ Py_DECREF(default_encoding);
+ return 0;
+bad:
+ Py_XDECREF(default_encoding);
+ return -1;
+}
+#endif
+#endif
+
+
+/* Test for GCC > 2.95 */
+#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
+ #define likely(x) __builtin_expect(!!(x), 1)
+ #define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* !__GNUC__ or GCC < 2.95 */
+ #define likely(x) (x)
+ #define unlikely(x) (x)
+#endif /* __GNUC__ */
+static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }
+
+static PyObject *__pyx_m = NULL;
+static PyObject *__pyx_d;
+static PyObject *__pyx_b;
+static PyObject *__pyx_cython_runtime = NULL;
+static PyObject *__pyx_empty_tuple;
+static PyObject *__pyx_empty_bytes;
+static PyObject *__pyx_empty_unicode;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+
+/* Header.proto */
+#if !defined(CYTHON_CCOMPLEX)
+ #if defined(__cplusplus)
+ #define CYTHON_CCOMPLEX 1
+ #elif defined(_Complex_I)
+ #define CYTHON_CCOMPLEX 1
+ #else
+ #define CYTHON_CCOMPLEX 0
+ #endif
+#endif
+#if CYTHON_CCOMPLEX
+ #ifdef __cplusplus
+ #include
+ #else
+ #include
+ #endif
+#endif
+#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
+ #undef _Complex_I
+ #define _Complex_I 1.0fj
+#endif
+
+
+static const char *__pyx_f[] = {
+ "box_overlaps.pyx",
+ "__init__.pxd",
+ "type.pxd",
+};
+/* BufferFormatStructs.proto */
+#define IS_UNSIGNED(type) (((type) -1) > 0)
+struct __Pyx_StructField_;
+#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
+typedef struct {
+ const char* name;
+ struct __Pyx_StructField_* fields;
+ size_t size;
+ size_t arraysize[8];
+ int ndim;
+ char typegroup;
+ char is_unsigned;
+ int flags;
+} __Pyx_TypeInfo;
+typedef struct __Pyx_StructField_ {
+ __Pyx_TypeInfo* type;
+ const char* name;
+ size_t offset;
+} __Pyx_StructField;
+typedef struct {
+ __Pyx_StructField* field;
+ size_t parent_offset;
+} __Pyx_BufFmt_StackElem;
+typedef struct {
+ __Pyx_StructField root;
+ __Pyx_BufFmt_StackElem* head;
+ size_t fmt_offset;
+ size_t new_count, enc_count;
+ size_t struct_alignment;
+ int is_complex;
+ char enc_type;
+ char new_packmode;
+ char enc_packmode;
+ char is_valid_array;
+} __Pyx_BufFmt_Context;
+
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":689
+ * # in Cython to enable them only on the right systems.
+ *
+ * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<<
+ * ctypedef npy_int16 int16_t
+ * ctypedef npy_int32 int32_t
+ */
+typedef npy_int8 __pyx_t_5numpy_int8_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":690
+ *
+ * ctypedef npy_int8 int8_t
+ * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<<
+ * ctypedef npy_int32 int32_t
+ * ctypedef npy_int64 int64_t
+ */
+typedef npy_int16 __pyx_t_5numpy_int16_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":691
+ * ctypedef npy_int8 int8_t
+ * ctypedef npy_int16 int16_t
+ * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<<
+ * ctypedef npy_int64 int64_t
+ * #ctypedef npy_int96 int96_t
+ */
+typedef npy_int32 __pyx_t_5numpy_int32_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":692
+ * ctypedef npy_int16 int16_t
+ * ctypedef npy_int32 int32_t
+ * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<<
+ * #ctypedef npy_int96 int96_t
+ * #ctypedef npy_int128 int128_t
+ */
+typedef npy_int64 __pyx_t_5numpy_int64_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":696
+ * #ctypedef npy_int128 int128_t
+ *
+ * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<<
+ * ctypedef npy_uint16 uint16_t
+ * ctypedef npy_uint32 uint32_t
+ */
+typedef npy_uint8 __pyx_t_5numpy_uint8_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":697
+ *
+ * ctypedef npy_uint8 uint8_t
+ * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<<
+ * ctypedef npy_uint32 uint32_t
+ * ctypedef npy_uint64 uint64_t
+ */
+typedef npy_uint16 __pyx_t_5numpy_uint16_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":698
+ * ctypedef npy_uint8 uint8_t
+ * ctypedef npy_uint16 uint16_t
+ * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<<
+ * ctypedef npy_uint64 uint64_t
+ * #ctypedef npy_uint96 uint96_t
+ */
+typedef npy_uint32 __pyx_t_5numpy_uint32_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":699
+ * ctypedef npy_uint16 uint16_t
+ * ctypedef npy_uint32 uint32_t
+ * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<<
+ * #ctypedef npy_uint96 uint96_t
+ * #ctypedef npy_uint128 uint128_t
+ */
+typedef npy_uint64 __pyx_t_5numpy_uint64_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":703
+ * #ctypedef npy_uint128 uint128_t
+ *
+ * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<<
+ * ctypedef npy_float64 float64_t
+ * #ctypedef npy_float80 float80_t
+ */
+typedef npy_float32 __pyx_t_5numpy_float32_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":704
+ *
+ * ctypedef npy_float32 float32_t
+ * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<<
+ * #ctypedef npy_float80 float80_t
+ * #ctypedef npy_float128 float128_t
+ */
+typedef npy_float64 __pyx_t_5numpy_float64_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":713
+ * # The int types are mapped a bit surprising --
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long int_t # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong long_t
+ * ctypedef npy_longlong longlong_t
+ */
+typedef npy_long __pyx_t_5numpy_int_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":714
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long int_t
+ * ctypedef npy_longlong long_t # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong longlong_t
+ *
+ */
+typedef npy_longlong __pyx_t_5numpy_long_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":715
+ * ctypedef npy_long int_t
+ * ctypedef npy_longlong long_t
+ * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_ulong uint_t
+ */
+typedef npy_longlong __pyx_t_5numpy_longlong_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":717
+ * ctypedef npy_longlong longlong_t
+ *
+ * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong ulong_t
+ * ctypedef npy_ulonglong ulonglong_t
+ */
+typedef npy_ulong __pyx_t_5numpy_uint_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":718
+ *
+ * ctypedef npy_ulong uint_t
+ * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong ulonglong_t
+ *
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":719
+ * ctypedef npy_ulong uint_t
+ * ctypedef npy_ulonglong ulong_t
+ * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_intp intp_t
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":721
+ * ctypedef npy_ulonglong ulonglong_t
+ *
+ * ctypedef npy_intp intp_t # <<<<<<<<<<<<<<
+ * ctypedef npy_uintp uintp_t
+ *
+ */
+typedef npy_intp __pyx_t_5numpy_intp_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":722
+ *
+ * ctypedef npy_intp intp_t
+ * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_double float_t
+ */
+typedef npy_uintp __pyx_t_5numpy_uintp_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":724
+ * ctypedef npy_uintp uintp_t
+ *
+ * ctypedef npy_double float_t # <<<<<<<<<<<<<<
+ * ctypedef npy_double double_t
+ * ctypedef npy_longdouble longdouble_t
+ */
+typedef npy_double __pyx_t_5numpy_float_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":725
+ *
+ * ctypedef npy_double float_t
+ * ctypedef npy_double double_t # <<<<<<<<<<<<<<
+ * ctypedef npy_longdouble longdouble_t
+ *
+ */
+typedef npy_double __pyx_t_5numpy_double_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":726
+ * ctypedef npy_double float_t
+ * ctypedef npy_double double_t
+ * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_cfloat cfloat_t
+ */
+typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
+
+/* "box_overlaps.pyx":13
+ *
+ * DTYPE = np.float
+ * ctypedef np.float_t DTYPE_t # <<<<<<<<<<<<<<
+ *
+ * def bbox_overlaps(
+ */
+typedef __pyx_t_5numpy_float_t __pyx_t_4bbox_DTYPE_t;
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX
+ #ifdef __cplusplus
+ typedef ::std::complex< float > __pyx_t_float_complex;
+ #else
+ typedef float _Complex __pyx_t_float_complex;
+ #endif
+#else
+ typedef struct { float real, imag; } __pyx_t_float_complex;
+#endif
+static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
+
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX
+ #ifdef __cplusplus
+ typedef ::std::complex< double > __pyx_t_double_complex;
+ #else
+ typedef double _Complex __pyx_t_double_complex;
+ #endif
+#else
+ typedef struct { double real, imag; } __pyx_t_double_complex;
+#endif
+static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
+
+
+/*--- Type declarations ---*/
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":728
+ * ctypedef npy_longdouble longdouble_t
+ *
+ * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<<
+ * ctypedef npy_cdouble cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t
+ */
+typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":729
+ *
+ * ctypedef npy_cfloat cfloat_t
+ * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<<
+ * ctypedef npy_clongdouble clongdouble_t
+ *
+ */
+typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":730
+ * ctypedef npy_cfloat cfloat_t
+ * ctypedef npy_cdouble cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_cdouble complex_t
+ */
+typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":732
+ * ctypedef npy_clongdouble clongdouble_t
+ *
+ * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew1(a):
+ */
+typedef npy_cdouble __pyx_t_5numpy_complex_t;
+
+/* --- Runtime support code (head) --- */
+/* Refnanny.proto */
+#ifndef CYTHON_REFNANNY
+ #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+ typedef struct {
+ void (*INCREF)(void*, PyObject*, int);
+ void (*DECREF)(void*, PyObject*, int);
+ void (*GOTREF)(void*, PyObject*, int);
+ void (*GIVEREF)(void*, PyObject*, int);
+ void* (*SetupContext)(const char*, int, const char*);
+ void (*FinishContext)(void**);
+ } __Pyx_RefNannyAPIStruct;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
+ #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+ if (acquire_gil) {\
+ PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+ PyGILState_Release(__pyx_gilstate_save);\
+ } else {\
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+ }
+#else
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
+#endif
+ #define __Pyx_RefNannyFinishContext()\
+ __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+ #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
+ #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
+ #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
+ #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
+#else
+ #define __Pyx_RefNannyDeclarations
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)
+ #define __Pyx_RefNannyFinishContext()
+ #define __Pyx_INCREF(r) Py_INCREF(r)
+ #define __Pyx_DECREF(r) Py_DECREF(r)
+ #define __Pyx_GOTREF(r)
+ #define __Pyx_GIVEREF(r)
+ #define __Pyx_XINCREF(r) Py_XINCREF(r)
+ #define __Pyx_XDECREF(r) Py_XDECREF(r)
+ #define __Pyx_XGOTREF(r)
+ #define __Pyx_XGIVEREF(r)
+#endif
+#define __Pyx_XDECREF_SET(r, v) do {\
+ PyObject *tmp = (PyObject *) r;\
+ r = v; __Pyx_XDECREF(tmp);\
+ } while (0)
+#define __Pyx_DECREF_SET(r, v) do {\
+ PyObject *tmp = (PyObject *) r;\
+ r = v; __Pyx_DECREF(tmp);\
+ } while (0)
+#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+/* PyObjectGetAttrStr.proto */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
+#endif
+
+/* GetBuiltinName.proto */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name);
+
+/* RaiseArgTupleInvalid.proto */
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+ Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
+
+/* RaiseDoubleKeywords.proto */
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
+
+/* ParseKeywords.proto */
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\
+ PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\
+ const char* function_name);
+
+/* ArgTypeTest.proto */
+#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\
+ ((likely((Py_TYPE(obj) == type) | (none_allowed && (obj == Py_None)))) ? 1 :\
+ __Pyx__ArgTypeTest(obj, type, name, exact))
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact);
+
+/* IsLittleEndian.proto */
+static CYTHON_INLINE int __Pyx_Is_Little_Endian(void);
+
+/* BufferFormatCheck.proto */
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts);
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+ __Pyx_BufFmt_StackElem* stack,
+ __Pyx_TypeInfo* type);
+
+/* BufferGetAndValidate.proto */
+#define __Pyx_GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)\
+ ((obj == Py_None || obj == NULL) ?\
+ (__Pyx_ZeroBuffer(buf), 0) :\
+ __Pyx__GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack))
+static int __Pyx__GetBufferAndValidate(Py_buffer* buf, PyObject* obj,
+ __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack);
+static void __Pyx_ZeroBuffer(Py_buffer* buf);
+static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info);
+static Py_ssize_t __Pyx_minusones[] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+static Py_ssize_t __Pyx_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/* PyDictVersioning.proto */
+#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1)
+#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\
+ (version_var) = __PYX_GET_DICT_VERSION(dict);\
+ (cache_var) = (value);
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\
+ static PY_UINT64_T __pyx_dict_version = 0;\
+ static PyObject *__pyx_dict_cached_value = NULL;\
+ if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\
+ (VAR) = __pyx_dict_cached_value;\
+ } else {\
+ (VAR) = __pyx_dict_cached_value = (LOOKUP);\
+ __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\
+ }\
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj);
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj);
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version);
+#else
+#define __PYX_GET_DICT_VERSION(dict) (0)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP);
+#endif
+
+/* GetModuleGlobalName.proto */
+#if CYTHON_USE_DICT_VERSIONS
+#define __Pyx_GetModuleGlobalName(var, name) do {\
+ static PY_UINT64_T __pyx_dict_version = 0;\
+ static PyObject *__pyx_dict_cached_value = NULL;\
+ (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
+ (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
+ __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\
+ PY_UINT64_T __pyx_dict_version;\
+ PyObject *__pyx_dict_cached_value;\
+ (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
+#else
+#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name)
+#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name)
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name);
+#endif
+
+/* PyObjectCall.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
+#else
+#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
+#endif
+
+/* ExtTypeTest.proto */
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type);
+
+/* BufferIndexError.proto */
+static void __Pyx_RaiseBufferIndexError(int axis);
+
+#define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1)
+/* PyThreadStateGet.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate;
+#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current;
+#define __Pyx_PyErr_Occurred() __pyx_tstate->curexc_type
+#else
+#define __Pyx_PyThreadState_declare
+#define __Pyx_PyThreadState_assign
+#define __Pyx_PyErr_Occurred() PyErr_Occurred()
+#endif
+
+/* PyErrFetchRestore.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL)
+#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL))
+#else
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#endif
+#else
+#define __Pyx_PyErr_Clear() PyErr_Clear()
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb)
+#endif
+
+/* GetTopmostException.proto */
+#if CYTHON_USE_EXC_INFO_STACK
+static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate);
+#endif
+
+/* SaveResetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+#else
+#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb)
+#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb)
+#endif
+
+/* PyErrExceptionMatches.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err)
+static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err);
+#else
+#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err)
+#endif
+
+/* GetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb)
+static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#else
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb);
+#endif
+
+/* RaiseException.proto */
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
+
+/* TypeImport.proto */
+#ifndef __PYX_HAVE_RT_ImportType_proto
+#define __PYX_HAVE_RT_ImportType_proto
+enum __Pyx_ImportType_CheckSize {
+ __Pyx_ImportType_CheckSize_Error = 0,
+ __Pyx_ImportType_CheckSize_Warn = 1,
+ __Pyx_ImportType_CheckSize_Ignore = 2
+};
+static PyTypeObject *__Pyx_ImportType(PyObject* module, const char *module_name, const char *class_name, size_t size, enum __Pyx_ImportType_CheckSize check_size);
+#endif
+
+/* Import.proto */
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
+
+/* CLineInTraceback.proto */
+#ifdef CYTHON_CLINE_IN_TRACEBACK
+#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0)
+#else
+static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line);
+#endif
+
+/* CodeObjectCache.proto */
+typedef struct {
+ PyCodeObject* code_object;
+ int code_line;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+ int count;
+ int max_count;
+ __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+
+/* AddTraceback.proto */
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+ int py_line, const char *filename);
+
+/* BufferStructDeclare.proto */
+typedef struct {
+ Py_ssize_t shape, strides, suboffsets;
+} __Pyx_Buf_DimInfo;
+typedef struct {
+ size_t refcount;
+ Py_buffer pybuffer;
+} __Pyx_Buffer;
+typedef struct {
+ __Pyx_Buffer *rcbuffer;
+ char *data;
+ __Pyx_Buf_DimInfo diminfo[8];
+} __Pyx_LocalBuf_ND;
+
+#if PY_MAJOR_VERSION < 3
+ static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
+ static void __Pyx_ReleaseBuffer(Py_buffer *view);
+#else
+ #define __Pyx_GetBuffer PyObject_GetBuffer
+ #define __Pyx_ReleaseBuffer PyBuffer_Release
+#endif
+
+
+/* GCCDiagnostics.proto */
+#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+#define __Pyx_HAS_GCC_DIAGNOSTIC
+#endif
+
+/* RealImag.proto */
+#if CYTHON_CCOMPLEX
+ #ifdef __cplusplus
+ #define __Pyx_CREAL(z) ((z).real())
+ #define __Pyx_CIMAG(z) ((z).imag())
+ #else
+ #define __Pyx_CREAL(z) (__real__(z))
+ #define __Pyx_CIMAG(z) (__imag__(z))
+ #endif
+#else
+ #define __Pyx_CREAL(z) ((z).real)
+ #define __Pyx_CIMAG(z) ((z).imag)
+#endif
+#if defined(__cplusplus) && CYTHON_CCOMPLEX\
+ && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103)
+ #define __Pyx_SET_CREAL(z,x) ((z).real(x))
+ #define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
+#else
+ #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
+ #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX
+ #define __Pyx_c_eq_float(a, b) ((a)==(b))
+ #define __Pyx_c_sum_float(a, b) ((a)+(b))
+ #define __Pyx_c_diff_float(a, b) ((a)-(b))
+ #define __Pyx_c_prod_float(a, b) ((a)*(b))
+ #define __Pyx_c_quot_float(a, b) ((a)/(b))
+ #define __Pyx_c_neg_float(a) (-(a))
+ #ifdef __cplusplus
+ #define __Pyx_c_is_zero_float(z) ((z)==(float)0)
+ #define __Pyx_c_conj_float(z) (::std::conj(z))
+ #if 1
+ #define __Pyx_c_abs_float(z) (::std::abs(z))
+ #define __Pyx_c_pow_float(a, b) (::std::pow(a, b))
+ #endif
+ #else
+ #define __Pyx_c_is_zero_float(z) ((z)==0)
+ #define __Pyx_c_conj_float(z) (conjf(z))
+ #if 1
+ #define __Pyx_c_abs_float(z) (cabsf(z))
+ #define __Pyx_c_pow_float(a, b) (cpowf(a, b))
+ #endif
+ #endif
+#else
+ static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex);
+ static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex);
+ #if 1
+ static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ #endif
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX
+ #define __Pyx_c_eq_double(a, b) ((a)==(b))
+ #define __Pyx_c_sum_double(a, b) ((a)+(b))
+ #define __Pyx_c_diff_double(a, b) ((a)-(b))
+ #define __Pyx_c_prod_double(a, b) ((a)*(b))
+ #define __Pyx_c_quot_double(a, b) ((a)/(b))
+ #define __Pyx_c_neg_double(a) (-(a))
+ #ifdef __cplusplus
+ #define __Pyx_c_is_zero_double(z) ((z)==(double)0)
+ #define __Pyx_c_conj_double(z) (::std::conj(z))
+ #if 1
+ #define __Pyx_c_abs_double(z) (::std::abs(z))
+ #define __Pyx_c_pow_double(a, b) (::std::pow(a, b))
+ #endif
+ #else
+ #define __Pyx_c_is_zero_double(z) ((z)==0)
+ #define __Pyx_c_conj_double(z) (conj(z))
+ #if 1
+ #define __Pyx_c_abs_double(z) (cabs(z))
+ #define __Pyx_c_pow_double(a, b) (cpow(a, b))
+ #endif
+ #endif
+#else
+ static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex);
+ static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex);
+ #if 1
+ static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ #endif
+#endif
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
+
+/* FastTypeChecks.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type)
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2);
+#else
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type)
+#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2))
+#endif
+#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception)
+
+/* CheckBinaryVersion.proto */
+static int __Pyx_check_binary_version(void);
+
+/* InitStrings.proto */
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
+
+
+/* Module declarations from 'cython' */
+
+/* Module declarations from 'cpython.buffer' */
+
+/* Module declarations from 'libc.string' */
+
+/* Module declarations from 'libc.stdio' */
+
+/* Module declarations from '__builtin__' */
+
+/* Module declarations from 'cpython.type' */
+static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0;
+
+/* Module declarations from 'cpython' */
+
+/* Module declarations from 'cpython.object' */
+
+/* Module declarations from 'cpython.ref' */
+
+/* Module declarations from 'cpython.mem' */
+
+/* Module declarations from 'numpy' */
+
+/* Module declarations from 'numpy' */
+static PyTypeObject *__pyx_ptype_5numpy_dtype = 0;
+static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0;
+static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
+
+/* Module declarations from 'bbox' */
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_4bbox_DTYPE_t = { "DTYPE_t", NULL, sizeof(__pyx_t_4bbox_DTYPE_t), { 0 }, 0, 'R', 0, 0 };
+#define __Pyx_MODULE_NAME "bbox"
+extern int __pyx_module_is_main_bbox;
+int __pyx_module_is_main_bbox = 0;
+
+/* Implementation of 'bbox' */
+static PyObject *__pyx_builtin_range;
+static PyObject *__pyx_builtin_ImportError;
+static const char __pyx_k_K[] = "K";
+static const char __pyx_k_N[] = "N";
+static const char __pyx_k_k[] = "k";
+static const char __pyx_k_n[] = "n";
+static const char __pyx_k_ih[] = "ih";
+static const char __pyx_k_iw[] = "iw";
+static const char __pyx_k_np[] = "np";
+static const char __pyx_k_ua[] = "ua";
+static const char __pyx_k_bbox[] = "bbox";
+static const char __pyx_k_main[] = "__main__";
+static const char __pyx_k_name[] = "__name__";
+static const char __pyx_k_test[] = "__test__";
+static const char __pyx_k_DTYPE[] = "DTYPE";
+static const char __pyx_k_boxes[] = "boxes";
+static const char __pyx_k_dtype[] = "dtype";
+static const char __pyx_k_float[] = "float";
+static const char __pyx_k_numpy[] = "numpy";
+static const char __pyx_k_range[] = "range";
+static const char __pyx_k_zeros[] = "zeros";
+static const char __pyx_k_import[] = "__import__";
+static const char __pyx_k_box_area[] = "box_area";
+static const char __pyx_k_overlaps[] = "overlaps";
+static const char __pyx_k_ImportError[] = "ImportError";
+static const char __pyx_k_query_boxes[] = "query_boxes";
+static const char __pyx_k_bbox_overlaps[] = "bbox_overlaps";
+static const char __pyx_k_box_overlaps_pyx[] = "box_overlaps.pyx";
+static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback";
+static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import";
+static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import";
+static PyObject *__pyx_n_s_DTYPE;
+static PyObject *__pyx_n_s_ImportError;
+static PyObject *__pyx_n_s_K;
+static PyObject *__pyx_n_s_N;
+static PyObject *__pyx_n_s_bbox;
+static PyObject *__pyx_n_s_bbox_overlaps;
+static PyObject *__pyx_n_s_box_area;
+static PyObject *__pyx_kp_s_box_overlaps_pyx;
+static PyObject *__pyx_n_s_boxes;
+static PyObject *__pyx_n_s_cline_in_traceback;
+static PyObject *__pyx_n_s_dtype;
+static PyObject *__pyx_n_s_float;
+static PyObject *__pyx_n_s_ih;
+static PyObject *__pyx_n_s_import;
+static PyObject *__pyx_n_s_iw;
+static PyObject *__pyx_n_s_k;
+static PyObject *__pyx_n_s_main;
+static PyObject *__pyx_n_s_n;
+static PyObject *__pyx_n_s_name;
+static PyObject *__pyx_n_s_np;
+static PyObject *__pyx_n_s_numpy;
+static PyObject *__pyx_kp_s_numpy_core_multiarray_failed_to;
+static PyObject *__pyx_kp_s_numpy_core_umath_failed_to_impor;
+static PyObject *__pyx_n_s_overlaps;
+static PyObject *__pyx_n_s_query_boxes;
+static PyObject *__pyx_n_s_range;
+static PyObject *__pyx_n_s_test;
+static PyObject *__pyx_n_s_ua;
+static PyObject *__pyx_n_s_zeros;
+static PyObject *__pyx_pf_4bbox_bbox_overlaps(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes); /* proto */
+static PyObject *__pyx_tuple_;
+static PyObject *__pyx_tuple__2;
+static PyObject *__pyx_tuple__3;
+static PyObject *__pyx_codeobj__4;
+/* Late includes */
+
+/* "box_overlaps.pyx":15
+ * ctypedef np.float_t DTYPE_t
+ *
+ * def bbox_overlaps( # <<<<<<<<<<<<<<
+ * np.ndarray[DTYPE_t, ndim=2] boxes,
+ * np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_4bbox_1bbox_overlaps(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_4bbox_bbox_overlaps[] = "\n Parameters\n ----------\n boxes: (N, 4) ndarray of float\n query_boxes: (K, 4) ndarray of float\n Returns\n -------\n overlaps: (N, K) ndarray of overlap between boxes and query_boxes\n ";
+static PyMethodDef __pyx_mdef_4bbox_1bbox_overlaps = {"bbox_overlaps", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_4bbox_1bbox_overlaps, METH_VARARGS|METH_KEYWORDS, __pyx_doc_4bbox_bbox_overlaps};
+static PyObject *__pyx_pw_4bbox_1bbox_overlaps(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyArrayObject *__pyx_v_boxes = 0;
+ PyArrayObject *__pyx_v_query_boxes = 0;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("bbox_overlaps (wrapper)", 0);
+ {
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_boxes,&__pyx_n_s_query_boxes,0};
+ PyObject* values[2] = {0,0};
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ CYTHON_FALLTHROUGH;
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ CYTHON_FALLTHROUGH;
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_boxes)) != 0)) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ CYTHON_FALLTHROUGH;
+ case 1:
+ if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_query_boxes)) != 0)) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("bbox_overlaps", 1, 2, 2, 1); __PYX_ERR(0, 15, __pyx_L3_error)
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "bbox_overlaps") < 0)) __PYX_ERR(0, 15, __pyx_L3_error)
+ }
+ } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
+ goto __pyx_L5_argtuple_error;
+ } else {
+ values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ }
+ __pyx_v_boxes = ((PyArrayObject *)values[0]);
+ __pyx_v_query_boxes = ((PyArrayObject *)values[1]);
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("bbox_overlaps", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 15, __pyx_L3_error)
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("bbox.bbox_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_boxes), __pyx_ptype_5numpy_ndarray, 1, "boxes", 0))) __PYX_ERR(0, 16, __pyx_L1_error)
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_query_boxes), __pyx_ptype_5numpy_ndarray, 1, "query_boxes", 0))) __PYX_ERR(0, 17, __pyx_L1_error)
+ __pyx_r = __pyx_pf_4bbox_bbox_overlaps(__pyx_self, __pyx_v_boxes, __pyx_v_query_boxes);
+
+ /* function exit code */
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_4bbox_bbox_overlaps(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes) {
+ unsigned int __pyx_v_N;
+ unsigned int __pyx_v_K;
+ PyArrayObject *__pyx_v_overlaps = 0;
+ __pyx_t_4bbox_DTYPE_t __pyx_v_iw;
+ __pyx_t_4bbox_DTYPE_t __pyx_v_ih;
+ __pyx_t_4bbox_DTYPE_t __pyx_v_box_area;
+ __pyx_t_4bbox_DTYPE_t __pyx_v_ua;
+ unsigned int __pyx_v_k;
+ unsigned int __pyx_v_n;
+ __Pyx_LocalBuf_ND __pyx_pybuffernd_boxes;
+ __Pyx_Buffer __pyx_pybuffer_boxes;
+ __Pyx_LocalBuf_ND __pyx_pybuffernd_overlaps;
+ __Pyx_Buffer __pyx_pybuffer_overlaps;
+ __Pyx_LocalBuf_ND __pyx_pybuffernd_query_boxes;
+ __Pyx_Buffer __pyx_pybuffer_query_boxes;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyArrayObject *__pyx_t_5 = NULL;
+ unsigned int __pyx_t_6;
+ unsigned int __pyx_t_7;
+ unsigned int __pyx_t_8;
+ size_t __pyx_t_9;
+ Py_ssize_t __pyx_t_10;
+ int __pyx_t_11;
+ size_t __pyx_t_12;
+ Py_ssize_t __pyx_t_13;
+ size_t __pyx_t_14;
+ Py_ssize_t __pyx_t_15;
+ size_t __pyx_t_16;
+ Py_ssize_t __pyx_t_17;
+ unsigned int __pyx_t_18;
+ unsigned int __pyx_t_19;
+ unsigned int __pyx_t_20;
+ __pyx_t_4bbox_DTYPE_t __pyx_t_21;
+ __pyx_t_4bbox_DTYPE_t __pyx_t_22;
+ __pyx_t_4bbox_DTYPE_t __pyx_t_23;
+ __pyx_t_4bbox_DTYPE_t __pyx_t_24;
+ int __pyx_t_25;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("bbox_overlaps", 0);
+ __pyx_pybuffer_overlaps.pybuffer.buf = NULL;
+ __pyx_pybuffer_overlaps.refcount = 0;
+ __pyx_pybuffernd_overlaps.data = NULL;
+ __pyx_pybuffernd_overlaps.rcbuffer = &__pyx_pybuffer_overlaps;
+ __pyx_pybuffer_boxes.pybuffer.buf = NULL;
+ __pyx_pybuffer_boxes.refcount = 0;
+ __pyx_pybuffernd_boxes.data = NULL;
+ __pyx_pybuffernd_boxes.rcbuffer = &__pyx_pybuffer_boxes;
+ __pyx_pybuffer_query_boxes.pybuffer.buf = NULL;
+ __pyx_pybuffer_query_boxes.refcount = 0;
+ __pyx_pybuffernd_query_boxes.data = NULL;
+ __pyx_pybuffernd_query_boxes.rcbuffer = &__pyx_pybuffer_query_boxes;
+ {
+ __Pyx_BufFmt_StackElem __pyx_stack[1];
+ if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_boxes, &__Pyx_TypeInfo_nn___pyx_t_4bbox_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 15, __pyx_L1_error)
+ }
+ __pyx_pybuffernd_boxes.diminfo[0].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_boxes.diminfo[0].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_boxes.diminfo[1].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_boxes.diminfo[1].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[1];
+ {
+ __Pyx_BufFmt_StackElem __pyx_stack[1];
+ if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_query_boxes, &__Pyx_TypeInfo_nn___pyx_t_4bbox_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 15, __pyx_L1_error)
+ }
+ __pyx_pybuffernd_query_boxes.diminfo[0].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_query_boxes.diminfo[0].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_query_boxes.diminfo[1].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_query_boxes.diminfo[1].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[1];
+
+ /* "box_overlaps.pyx":27
+ * overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+ * """
+ * cdef unsigned int N = boxes.shape[0] # <<<<<<<<<<<<<<
+ * cdef unsigned int K = query_boxes.shape[0]
+ * cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+ */
+ __pyx_v_N = (__pyx_v_boxes->dimensions[0]);
+
+ /* "box_overlaps.pyx":28
+ * """
+ * cdef unsigned int N = boxes.shape[0]
+ * cdef unsigned int K = query_boxes.shape[0] # <<<<<<<<<<<<<<
+ * cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+ * cdef DTYPE_t iw, ih, box_area
+ */
+ __pyx_v_K = (__pyx_v_query_boxes->dimensions[0]);
+
+ /* "box_overlaps.pyx":29
+ * cdef unsigned int N = boxes.shape[0]
+ * cdef unsigned int K = query_boxes.shape[0]
+ * cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) # <<<<<<<<<<<<<<
+ * cdef DTYPE_t iw, ih, box_area
+ * cdef DTYPE_t ua
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_zeros); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = __Pyx_PyInt_From_unsigned_int(__pyx_v_N); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_3 = __Pyx_PyInt_From_unsigned_int(__pyx_v_K); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_GIVEREF(__pyx_t_1);
+ PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_3);
+ __pyx_t_1 = 0;
+ __pyx_t_3 = 0;
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_4);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4);
+ __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_DTYPE); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_dtype, __pyx_t_1) < 0) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, __pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 29, __pyx_L1_error)
+ __pyx_t_5 = ((PyArrayObject *)__pyx_t_1);
+ {
+ __Pyx_BufFmt_StackElem __pyx_stack[1];
+ if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_4bbox_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES| PyBUF_WRITABLE, 2, 0, __pyx_stack) == -1)) {
+ __pyx_v_overlaps = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.buf = NULL;
+ __PYX_ERR(0, 29, __pyx_L1_error)
+ } else {__pyx_pybuffernd_overlaps.diminfo[0].strides = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_overlaps.diminfo[0].shape = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_overlaps.diminfo[1].strides = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_overlaps.diminfo[1].shape = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.shape[1];
+ }
+ }
+ __pyx_t_5 = 0;
+ __pyx_v_overlaps = ((PyArrayObject *)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "box_overlaps.pyx":33
+ * cdef DTYPE_t ua
+ * cdef unsigned int k, n
+ * for k in range(K): # <<<<<<<<<<<<<<
+ * box_area = (
+ * (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+ */
+ __pyx_t_6 = __pyx_v_K;
+ __pyx_t_7 = __pyx_t_6;
+ for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
+ __pyx_v_k = __pyx_t_8;
+
+ /* "box_overlaps.pyx":35
+ * for k in range(K):
+ * box_area = (
+ * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * # <<<<<<<<<<<<<<
+ * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ * )
+ */
+ __pyx_t_9 = __pyx_v_k;
+ __pyx_t_10 = 2;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_9 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_10 < 0) {
+ __pyx_t_10 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_10 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_10 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 35, __pyx_L1_error)
+ }
+ __pyx_t_12 = __pyx_v_k;
+ __pyx_t_13 = 0;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_12 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_13 < 0) {
+ __pyx_t_13 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_13 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_13 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 35, __pyx_L1_error)
+ }
+
+ /* "box_overlaps.pyx":36
+ * box_area = (
+ * (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+ * (query_boxes[k, 3] - query_boxes[k, 1] + 1) # <<<<<<<<<<<<<<
+ * )
+ * for n in range(N):
+ */
+ __pyx_t_14 = __pyx_v_k;
+ __pyx_t_15 = 3;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_14 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_15 < 0) {
+ __pyx_t_15 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_15 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_15 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 36, __pyx_L1_error)
+ }
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 1;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 36, __pyx_L1_error)
+ }
+
+ /* "box_overlaps.pyx":35
+ * for k in range(K):
+ * box_area = (
+ * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * # <<<<<<<<<<<<<<
+ * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ * )
+ */
+ __pyx_v_box_area = ((((*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_9, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_10, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_13, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_14, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0));
+
+ /* "box_overlaps.pyx":38
+ * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ * )
+ * for n in range(N): # <<<<<<<<<<<<<<
+ * iw = (
+ * min(boxes[n, 2], query_boxes[k, 2]) -
+ */
+ __pyx_t_18 = __pyx_v_N;
+ __pyx_t_19 = __pyx_t_18;
+ for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_19; __pyx_t_20+=1) {
+ __pyx_v_n = __pyx_t_20;
+
+ /* "box_overlaps.pyx":40
+ * for n in range(N):
+ * iw = (
+ * min(boxes[n, 2], query_boxes[k, 2]) - # <<<<<<<<<<<<<<
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1
+ * )
+ */
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 2;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 40, __pyx_L1_error)
+ }
+ __pyx_t_21 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 2;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 40, __pyx_L1_error)
+ }
+ __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides));
+ if (((__pyx_t_21 < __pyx_t_22) != 0)) {
+ __pyx_t_23 = __pyx_t_21;
+ } else {
+ __pyx_t_23 = __pyx_t_22;
+ }
+
+ /* "box_overlaps.pyx":41
+ * iw = (
+ * min(boxes[n, 2], query_boxes[k, 2]) -
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1 # <<<<<<<<<<<<<<
+ * )
+ * if iw > 0:
+ */
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 0;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 41, __pyx_L1_error)
+ }
+ __pyx_t_21 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 0;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 41, __pyx_L1_error)
+ }
+ __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides));
+ if (((__pyx_t_21 > __pyx_t_22) != 0)) {
+ __pyx_t_24 = __pyx_t_21;
+ } else {
+ __pyx_t_24 = __pyx_t_22;
+ }
+
+ /* "box_overlaps.pyx":40
+ * for n in range(N):
+ * iw = (
+ * min(boxes[n, 2], query_boxes[k, 2]) - # <<<<<<<<<<<<<<
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1
+ * )
+ */
+ __pyx_v_iw = ((__pyx_t_23 - __pyx_t_24) + 1.0);
+
+ /* "box_overlaps.pyx":43
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1
+ * )
+ * if iw > 0: # <<<<<<<<<<<<<<
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) -
+ */
+ __pyx_t_25 = ((__pyx_v_iw > 0.0) != 0);
+ if (__pyx_t_25) {
+
+ /* "box_overlaps.pyx":45
+ * if iw > 0:
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) - # <<<<<<<<<<<<<<
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1
+ * )
+ */
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 3;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 45, __pyx_L1_error)
+ }
+ __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 3;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 45, __pyx_L1_error)
+ }
+ __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides));
+ if (((__pyx_t_24 < __pyx_t_23) != 0)) {
+ __pyx_t_21 = __pyx_t_24;
+ } else {
+ __pyx_t_21 = __pyx_t_23;
+ }
+
+ /* "box_overlaps.pyx":46
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) -
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1 # <<<<<<<<<<<<<<
+ * )
+ * if ih > 0:
+ */
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 1;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 46, __pyx_L1_error)
+ }
+ __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 1;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 46, __pyx_L1_error)
+ }
+ __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides));
+ if (((__pyx_t_24 > __pyx_t_23) != 0)) {
+ __pyx_t_22 = __pyx_t_24;
+ } else {
+ __pyx_t_22 = __pyx_t_23;
+ }
+
+ /* "box_overlaps.pyx":45
+ * if iw > 0:
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) - # <<<<<<<<<<<<<<
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1
+ * )
+ */
+ __pyx_v_ih = ((__pyx_t_21 - __pyx_t_22) + 1.0);
+
+ /* "box_overlaps.pyx":48
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1
+ * )
+ * if ih > 0: # <<<<<<<<<<<<<<
+ * ua = float(
+ * (boxes[n, 2] - boxes[n, 0] + 1) *
+ */
+ __pyx_t_25 = ((__pyx_v_ih > 0.0) != 0);
+ if (__pyx_t_25) {
+
+ /* "box_overlaps.pyx":50
+ * if ih > 0:
+ * ua = float(
+ * (boxes[n, 2] - boxes[n, 0] + 1) * # <<<<<<<<<<<<<<
+ * (boxes[n, 3] - boxes[n, 1] + 1) +
+ * box_area - iw * ih
+ */
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 2;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 50, __pyx_L1_error)
+ }
+ __pyx_t_14 = __pyx_v_n;
+ __pyx_t_15 = 0;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_14 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_15 < 0) {
+ __pyx_t_15 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_15 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_15 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 50, __pyx_L1_error)
+ }
+
+ /* "box_overlaps.pyx":51
+ * ua = float(
+ * (boxes[n, 2] - boxes[n, 0] + 1) *
+ * (boxes[n, 3] - boxes[n, 1] + 1) + # <<<<<<<<<<<<<<
+ * box_area - iw * ih
+ * )
+ */
+ __pyx_t_12 = __pyx_v_n;
+ __pyx_t_13 = 3;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_12 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_13 < 0) {
+ __pyx_t_13 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_13 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_13 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 51, __pyx_L1_error)
+ }
+ __pyx_t_9 = __pyx_v_n;
+ __pyx_t_10 = 1;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_9 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_10 < 0) {
+ __pyx_t_10 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_10 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_10 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 51, __pyx_L1_error)
+ }
+
+ /* "box_overlaps.pyx":49
+ * )
+ * if ih > 0:
+ * ua = float( # <<<<<<<<<<<<<<
+ * (boxes[n, 2] - boxes[n, 0] + 1) *
+ * (boxes[n, 3] - boxes[n, 1] + 1) +
+ */
+ __pyx_v_ua = ((double)((((((*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_14, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_13, __pyx_pybuffernd_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_9, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_10, __pyx_pybuffernd_boxes.diminfo[1].strides))) + 1.0)) + __pyx_v_box_area) - (__pyx_v_iw * __pyx_v_ih)));
+
+ /* "box_overlaps.pyx":54
+ * box_area - iw * ih
+ * )
+ * overlaps[n, k] = iw * ih / ua # <<<<<<<<<<<<<<
+ * return overlaps
+ */
+ __pyx_t_22 = (__pyx_v_iw * __pyx_v_ih);
+ if (unlikely(__pyx_v_ua == 0)) {
+ PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+ __PYX_ERR(0, 54, __pyx_L1_error)
+ }
+ __pyx_t_9 = __pyx_v_n;
+ __pyx_t_12 = __pyx_v_k;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_9 >= (size_t)__pyx_pybuffernd_overlaps.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (unlikely(__pyx_t_12 >= (size_t)__pyx_pybuffernd_overlaps.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 54, __pyx_L1_error)
+ }
+ *__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.buf, __pyx_t_9, __pyx_pybuffernd_overlaps.diminfo[0].strides, __pyx_t_12, __pyx_pybuffernd_overlaps.diminfo[1].strides) = (__pyx_t_22 / __pyx_v_ua);
+
+ /* "box_overlaps.pyx":48
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1
+ * )
+ * if ih > 0: # <<<<<<<<<<<<<<
+ * ua = float(
+ * (boxes[n, 2] - boxes[n, 0] + 1) *
+ */
+ }
+
+ /* "box_overlaps.pyx":43
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1
+ * )
+ * if iw > 0: # <<<<<<<<<<<<<<
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) -
+ */
+ }
+ }
+ }
+
+ /* "box_overlaps.pyx":55
+ * )
+ * overlaps[n, k] = iw * ih / ua
+ * return overlaps # <<<<<<<<<<<<<<
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject *)__pyx_v_overlaps));
+ __pyx_r = ((PyObject *)__pyx_v_overlaps);
+ goto __pyx_L0;
+
+ /* "box_overlaps.pyx":15
+ * ctypedef np.float_t DTYPE_t
+ *
+ * def bbox_overlaps( # <<<<<<<<<<<<<<
+ * np.ndarray[DTYPE_t, ndim=2] boxes,
+ * np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+ __Pyx_PyThreadState_declare
+ __Pyx_PyThreadState_assign
+ __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer);
+ __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+ __Pyx_AddTraceback("bbox.bbox_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ goto __pyx_L2;
+ __pyx_L0:;
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer);
+ __pyx_L2:;
+ __Pyx_XDECREF((PyObject *)__pyx_v_overlaps);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":734
+ * ctypedef npy_cdouble complex_t
+ *
+ * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(1, a)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":735
+ *
+ * cdef inline object PyArray_MultiIterNew1(a):
+ * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 735, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":734
+ * ctypedef npy_cdouble complex_t
+ *
+ * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(1, a)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":737
+ * return PyArray_MultiIterNew(1, a)
+ *
+ * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(2, a, b)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":738
+ *
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 738, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":737
+ * return PyArray_MultiIterNew(1, a)
+ *
+ * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(2, a, b)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":740
+ * return PyArray_MultiIterNew(2, a, b)
+ *
+ * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(3, a, b, c)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":741
+ *
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 741, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":740
+ * return PyArray_MultiIterNew(2, a, b)
+ *
+ * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(3, a, b, c)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":743
+ * return PyArray_MultiIterNew(3, a, b, c)
+ *
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(4, a, b, c, d)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":744
+ *
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 744, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":743
+ * return PyArray_MultiIterNew(3, a, b, c)
+ *
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(4, a, b, c, d)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":746
+ * return PyArray_MultiIterNew(4, a, b, c, d)
+ *
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(5, a, b, c, d, e)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":747
+ *
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<<
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 747, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":746
+ * return PyArray_MultiIterNew(4, a, b, c, d)
+ *
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(5, a, b, c, d, e)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":749
+ * return PyArray_MultiIterNew(5, a, b, c, d, e)
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<<
+ * if PyDataType_HASSUBARRAY(d):
+ * return d.subarray.shape
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_t_1;
+ __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":750
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<<
+ * return d.subarray.shape
+ * else:
+ */
+ __pyx_t_1 = (PyDataType_HASSUBARRAY(__pyx_v_d) != 0);
+ if (__pyx_t_1) {
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":751
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ * if PyDataType_HASSUBARRAY(d):
+ * return d.subarray.shape # <<<<<<<<<<<<<<
+ * else:
+ * return ()
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject*)__pyx_v_d->subarray->shape));
+ __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape);
+ goto __pyx_L0;
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":750
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<<
+ * return d.subarray.shape
+ * else:
+ */
+ }
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":753
+ * return d.subarray.shape
+ * else:
+ * return () # <<<<<<<<<<<<<<
+ *
+ *
+ */
+ /*else*/ {
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_empty_tuple);
+ __pyx_r = __pyx_empty_tuple;
+ goto __pyx_L0;
+ }
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":749
+ * return PyArray_MultiIterNew(5, a, b, c, d, e)
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<<
+ * if PyDataType_HASSUBARRAY(d):
+ * return d.subarray.shape
+ */
+
+ /* function exit code */
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":868
+ * int _import_umath() except -1
+ *
+ * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<<
+ * Py_INCREF(base) # important to do this before stealing the reference below!
+ * PyArray_SetBaseObject(arr, base)
+ */
+
+static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("set_array_base", 0);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":869
+ *
+ * cdef inline void set_array_base(ndarray arr, object base):
+ * Py_INCREF(base) # important to do this before stealing the reference below! # <<<<<<<<<<<<<<
+ * PyArray_SetBaseObject(arr, base)
+ *
+ */
+ Py_INCREF(__pyx_v_base);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":870
+ * cdef inline void set_array_base(ndarray arr, object base):
+ * Py_INCREF(base) # important to do this before stealing the reference below!
+ * PyArray_SetBaseObject(arr, base) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object get_array_base(ndarray arr):
+ */
+ (void)(PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base));
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":868
+ * int _import_umath() except -1
+ *
+ * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<<
+ * Py_INCREF(base) # important to do this before stealing the reference below!
+ * PyArray_SetBaseObject(arr, base)
+ */
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+}
+
+/* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":872
+ * PyArray_SetBaseObject(arr, base)
+ *
+ * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<<
+ * base = PyArray_BASE(arr)
+ * if base is NULL:
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
+ PyObject *__pyx_v_base;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_t_1;
+ __Pyx_RefNannySetupContext("get_array_base", 0);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":873
+ *
+ * cdef inline object get_array_base(ndarray arr):
+ * base = PyArray_BASE(arr) # <<<<<<<<<<<<<<
+ * if base is NULL:
+ * return None
+ */
+ __pyx_v_base = PyArray_BASE(__pyx_v_arr);
+
+ /* "../../../miniconda3/envs/py3.6/lib/python3.6/site-packages/numpy/__init__.pxd":874
+ * cdef inline object get_array_base(ndarray arr):
+ * base = PyArray_BASE(arr)
+ * if base is NULL: # <<<<<<<<<<<<<<
+ * return None
+ * return