diff --git a/README.md b/README.md
index 054592100bfb30e57ad015badff944719f6a4455..eb19a55a870f085f1c9ccece612a37d9d4f7e867 100644
--- a/README.md
+++ b/README.md
@@ -696,7 +696,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
RetinaFace |
FP16 |
Supported |
- - |
+ Supported |
INT8 |
diff --git a/models/cv/classification/convnext_base/ixrt/README.md b/models/cv/classification/convnext_base/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..62bfb23c5f1280728eea1f0e0ca843441e6ad64f
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/README.md
@@ -0,0 +1,59 @@
+# ConvNeXt Base
+
+## Description
+
+The ConvNeXt Base model represents a significant stride in the evolution of convolutional neural networks (CNNs), introduced by researchers at Facebook AI Research (FAIR) and UC Berkeley. It is part of the ConvNeXt family, which challenges the dominance of Vision Transformers (ViTs) in the realm of visual recognition tasks.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install ppq
+pip3 install tqdm
+pip3 install cuda-python
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight convnext_base-6075fbad.pth --output convnext_base.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+
+# Accuracy
+bash scripts/infer_convnext_base_fp16_accuracy.sh
+# Performance
+bash scripts/infer_convnext_base_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| -------------- | --------- | --------- | ------- | -------- | -------- |
+| ConvNext Base | 32 | FP16 | 219.817 | 83.70 | 96.73 |
diff --git a/models/cv/classification/convnext_base/ixrt/build_engine.py b/models/cv/classification/convnext_base/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..038c15d5af7a51da6386aa0f994e830178330fb3
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/build_engine.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+def main(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ build_config.set_flag(precision)
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+ help="The precision of datatype")
+ parser.add_argument("--engine", type=str, default=None)
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ args = parse_args()
+ main(args)
\ No newline at end of file
diff --git a/models/cv/classification/convnext_base/ixrt/calibration_dataset.py b/models/cv/classification/convnext_base/ixrt/calibration_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec931c656abf5b2309dc9938490df46e4e8cdb19
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/calibration_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+ def __init__(self, *args, **kwargs):
+ super(CalibrationImageNet, self).__init__(*args, **kwargs)
+ img2label_path = os.path.join(self.root, "val_map.txt")
+ if not os.path.exists(img2label_path):
+ raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+ self.img2label_map = self.make_img2label_map(img2label_path)
+
+ def make_img2label_map(self, path):
+ with open(path) as f:
+ lines = f.readlines()
+
+ img2lable_map = dict()
+ for line in lines:
+ line = line.lstrip().rstrip().split("\t")
+ if len(line) != 2:
+ continue
+ img_name, label = line
+ img_name = img_name.strip()
+ if img_name in [None, ""]:
+ continue
+ label = int(label.strip())
+ img2lable_map[img_name] = label
+ return img2lable_map
+
+ def __getitem__(self, index):
+ path, target = self.samples[index]
+ sample = self.loader(path)
+ if self.transform is not None:
+ sample = self.transform(sample)
+ # if self.target_transform is not None:
+ # target = self.target_transform(target)
+ img_name = os.path.basename(path)
+ target = self.img2label_map[img_name]
+
+ return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+ dataset = CalibrationImageNet(
+ data_path,
+ transform=T.Compose(
+ [
+ T.Resize(256),
+ T.CenterCrop(img_sz),
+ T.ToTensor(),
+ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+ ]
+ ),
+ )
+
+ calibration_dataset = dataset
+ if num_samples is not None:
+ calibration_dataset = torch.utils.data.Subset(
+ dataset, indices=range(num_samples)
+ )
+
+ calibration_dataloader = DataLoader(
+ calibration_dataset,
+ shuffle=True,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ verify_dataloader = DataLoader(
+ dataset,
+ shuffle=False,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+ num_samples = min(total_sample, step * batch_size)
+ if step < 0:
+ num_samples = None
+ calibration_dataloader, _ = create_dataloaders(
+ dataset_dir,
+ img_sz=img_sz,
+ batch_size=batch_size,
+ workers=workers,
+ num_samples=num_samples,
+ )
+ return calibration_dataloader
diff --git a/models/cv/classification/convnext_base/ixrt/common.py b/models/cv/classification/convnext_base/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd6a84d85fcc4ede8f64ed0e0a2dd7022527a956
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/common.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+ batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+ values, indices = batch_score.topk(5)
+ top1, top5 = 0, 0
+ for idx, label in enumerate(batch_label):
+
+ if label == indices[idx][0]:
+ top1 += 1
+ if label in indices[idx]:
+ top5 += 1
+ return top1, top5
+
+def create_engine_context(engine_path, logger):
+ with open(engine_path, "rb") as f:
+ runtime = tensorrt.Runtime(logger)
+ assert runtime
+ engine = runtime.deserialize_cuda_engine(f.read())
+ assert engine
+ context = engine.create_execution_context()
+ assert context
+
+ return engine, context
+
+def get_io_bindings(engine):
+ # Setup I/O bindings
+ inputs = []
+ outputs = []
+ allocations = []
+
+ for i in range(engine.num_bindings):
+ is_input = False
+ if engine.binding_is_input(i):
+ is_input = True
+ name = engine.get_binding_name(i)
+ dtype = engine.get_binding_dtype(i)
+ shape = engine.get_binding_shape(i)
+ if is_input:
+ batch_size = shape[0]
+ size = np.dtype(tensorrt.nptype(dtype)).itemsize
+ for s in shape:
+ size *= s
+ err, allocation = cudart.cudaMalloc(size)
+ assert err == cudart.cudaError_t.cudaSuccess
+ binding = {
+ "index": i,
+ "name": name,
+ "dtype": np.dtype(tensorrt.nptype(dtype)),
+ "shape": list(shape),
+ "allocation": allocation,
+ "nbytes": size,
+ }
+ print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}")
+ allocations.append(allocation)
+ if engine.binding_is_input(i):
+ inputs.append(binding)
+ else:
+ outputs.append(binding)
+ return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/classification/convnext_base/ixrt/config/CONVNEXT_CONFIG b/models/cv/classification/convnext_base/ixrt/config/CONVNEXT_CONFIG
new file mode 100644
index 0000000000000000000000000000000000000000..71bf70ad92da305b87f4cd5283b66a7f5d893f0e
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/config/CONVNEXT_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=Convnext_base
+ORIGINE_MODEL=convnext_base.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+ # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema]
+ # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape)
+ # QUANT_STEP : 量化步数
+ # QUANT_SEED : 随机种子 保证量化结果可复现
+ # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=minmax
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/convnext_base/ixrt/export.py b/models/cv/classification/convnext_base/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9a2fe01e69085ca3c53782d32080c54cbbd1e5a
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ model = torchvision.models.convnext_base()
+ model.load_state_dict(torch.load(args.weight))
+ model.eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/convnext_base/ixrt/inference.py b/models/cv/classification/convnext_base/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..22f1644ced656c96602e15e468166d6df4fec92c
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/inference.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+ dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+ host_mem = tensorrt.IHostMemory
+ logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+ # Load Engine && I/O bindings
+ engine, context = create_engine_context(config.engine_file, logger)
+ inputs, outputs, allocations = get_io_bindings(engine)
+
+ # Warm up
+ if config.warm_up > 0:
+ print("\nWarm Start.")
+ for i in range(config.warm_up):
+ context.execute_v2(allocations)
+ print("Warm Done.")
+
+ # Inference
+ if config.test_mode == "FPS":
+ torch.cuda.synchronize()
+ start_time = time.time()
+
+ for i in range(config.loop_count):
+ context.execute_v2(allocations)
+
+ torch.cuda.synchronize()
+ end_time = time.time()
+ forward_time = end_time - start_time
+
+ num_samples = 50000
+ if config.loop_count * config.bsz < num_samples:
+ num_samples = config.loop_count * config.bsz
+ fps = num_samples / forward_time
+
+ print("FPS : ", fps)
+ print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+ if fps >= config.fps_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+ elif config.test_mode == "ACC":
+
+ ## Prepare the output data
+ output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+ print(f"output shape : {output.shape} output type : {output.dtype}")
+
+ total_sample = 0
+ acc_top1, acc_top5 = 0, 0
+
+ with tqdm(total= len(dataloader)) as _tqdm:
+ for idx, (batch_data, batch_label) in enumerate(dataloader):
+ batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+ batch_data = np.ascontiguousarray(batch_data)
+ total_sample += batch_data.shape[0]
+ (err,) = cudart.cudaMemcpy(
+ inputs[0]["allocation"],
+ batch_data,
+ batch_data.nbytes,
+ cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+ # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+ context.execute_v2(allocations)
+ (err,) = cudart.cudaMemcpy(
+ output,
+ outputs[0]["allocation"],
+ outputs[0]["nbytes"],
+ cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+ # cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+ # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+ if len(output.shape) == 4:
+ output = output.squeeze(axis=(2,3))
+
+ batch_top1, batch_top5 = eval_batch(output, batch_label)
+ acc_top1 += batch_top1
+ acc_top5 += batch_top5
+
+ _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+ acc_5='{:.4f}'.format(acc_top5/total_sample))
+ _tqdm.update(1)
+ err, = cudart.cudaFree(inputs[0]["allocation"])
+ assert err == cudart.cudaError_t.cudaSuccess
+ err, = cudart.cudaFree(outputs[0]["allocation"])
+ assert err == cudart.cudaError_t.cudaSuccess
+ print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+ print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+ acc1 = acc_top1/total_sample
+ print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+ if acc1 >= config.acc_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+def parse_config():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+ parser.add_argument(
+ "--engine_file",
+ type=str,
+ help="engine file path"
+ )
+ parser.add_argument(
+ "--datasets_dir",
+ type=str,
+ default="",
+ help="ImageNet dir",
+ )
+ parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+ parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+ parser.add_argument(
+ "--imgsz",
+ "--img",
+ "--img-size",
+ type=int,
+ default=224,
+ help="inference size h,w",
+ )
+ parser.add_argument("--use_async", action="store_true")
+ parser.add_argument(
+ "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+ )
+ parser.add_argument("--fps_target", type=float, default=-1.0)
+ parser.add_argument("--acc_target", type=float, default=-1.0)
+ parser.add_argument("--loop_count", type=int, default=-1)
+
+ config = parser.parse_args()
+ return config
+
+if __name__ == "__main__":
+ config = parse_config()
+ main(config)
diff --git a/models/cv/classification/convnext_base/ixrt/modify_batchsize.py b/models/cv/classification/convnext_base/ixrt/modify_batchsize.py
new file mode 100644
index 0000000000000000000000000000000000000000..689b7a972dcbfec77c185592ede16bb4f04fa4fd
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/modify_batchsize.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+ batch_size = bsz
+
+ # The following code changes the first dimension of every input to be batch_size
+ # Modify as appropriate ... note that this requires all inputs to
+ # have the same batch_size
+ inputs = model.graph.input
+ for input in inputs:
+ # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+ # Add checks as needed.
+ dim1 = input.type.tensor_type.shape.dim[0]
+ # update dim to be a symbolic value
+ if isinstance(batch_size, str):
+ # set dynamic batch size
+ dim1.dim_param = batch_size
+ elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+ # set given batch size
+ dim1.dim_value = int(batch_size)
+ else:
+ # set batch size of 1
+ dim1.dim_value = 1
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--batch_size", type=int)
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+
+
+
+
diff --git a/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_accuracy.sh b/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a43c1a2067ef1527376279bc10ccace08dfa9b8f
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_accuracy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/CONVNEXT_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --acc_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_performance.sh b/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3e5bca55207bccc0e09377816ba66e83d2909d86
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_performance.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/CONVNEXT_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --fps_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/convnext_base/ixrt/simplify_model.py b/models/cv/classification/convnext_base/ixrt/simplify_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..9948a9fa083ff99ff88e556e96614b02cccaa965
--- /dev/null
+++ b/models/cv/classification/convnext_base/ixrt/simplify_model.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+ onnx_model = onnx.load(args.origin_model)
+ model_simp, check = simplify(onnx_model)
+ model_simp = onnx.shape_inference.infer_shapes(model_simp)
+ onnx.save(model_simp, args.output_model)
+ print(" Simplify onnx Done.")
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ parser.add_argument("--reshape", action="store_true")
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+simplify_model(args)
+
+
+
+
diff --git a/models/cv/classification/deit_tiny/ixrt/README.md b/models/cv/classification/deit_tiny/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7a58584d058a99a036b9b201b0821cb03de7015b
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/README.md
@@ -0,0 +1,72 @@
+# DeiT-tiny
+
+## Description
+
+DeiT Tiny is a lightweight vision transformer designed for data-efficient learning. It achieves rapid training and high accuracy on small datasets through innovative attention distillation methods, while maintaining the simplicity and efficiency of the model.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install ppq
+pip3 install tqdm
+pip3 install cuda-python
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# git clone mmpretrain
+git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
+
+# export onnx model
+python3 export.py --cfg mmpretrain/configs/deit/deit-tiny_pt-4xb256_in1k.py --weight deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth --output deit_tiny.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim deit_tiny.onnx deit_tiny_opt.onnx
+
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+
+# Accuracy
+bash scripts/infer_deit_tiny_fp16_accuracy.sh
+# Performance
+bash scripts/infer_deit_tiny_fp16_performance.sh
+
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| --------- | --------- | --------- | -------- | -------- | -------- |
+| DeiT-tiny | 32 | FP16 | 1446.690 | 74.34 | 92.21 |
+
+## Reference
+
+Deit_tiny:
\ No newline at end of file
diff --git a/models/cv/classification/deit_tiny/ixrt/build_engine.py b/models/cv/classification/deit_tiny/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..038c15d5af7a51da6386aa0f994e830178330fb3
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/build_engine.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+def main(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ build_config.set_flag(precision)
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+ help="The precision of datatype")
+ parser.add_argument("--engine", type=str, default=None)
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ args = parse_args()
+ main(args)
\ No newline at end of file
diff --git a/models/cv/classification/deit_tiny/ixrt/calibration_dataset.py b/models/cv/classification/deit_tiny/ixrt/calibration_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec931c656abf5b2309dc9938490df46e4e8cdb19
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/calibration_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+ def __init__(self, *args, **kwargs):
+ super(CalibrationImageNet, self).__init__(*args, **kwargs)
+ img2label_path = os.path.join(self.root, "val_map.txt")
+ if not os.path.exists(img2label_path):
+ raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+ self.img2label_map = self.make_img2label_map(img2label_path)
+
+ def make_img2label_map(self, path):
+ with open(path) as f:
+ lines = f.readlines()
+
+ img2lable_map = dict()
+ for line in lines:
+ line = line.lstrip().rstrip().split("\t")
+ if len(line) != 2:
+ continue
+ img_name, label = line
+ img_name = img_name.strip()
+ if img_name in [None, ""]:
+ continue
+ label = int(label.strip())
+ img2lable_map[img_name] = label
+ return img2lable_map
+
+ def __getitem__(self, index):
+ path, target = self.samples[index]
+ sample = self.loader(path)
+ if self.transform is not None:
+ sample = self.transform(sample)
+ # if self.target_transform is not None:
+ # target = self.target_transform(target)
+ img_name = os.path.basename(path)
+ target = self.img2label_map[img_name]
+
+ return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+ dataset = CalibrationImageNet(
+ data_path,
+ transform=T.Compose(
+ [
+ T.Resize(256),
+ T.CenterCrop(img_sz),
+ T.ToTensor(),
+ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+ ]
+ ),
+ )
+
+ calibration_dataset = dataset
+ if num_samples is not None:
+ calibration_dataset = torch.utils.data.Subset(
+ dataset, indices=range(num_samples)
+ )
+
+ calibration_dataloader = DataLoader(
+ calibration_dataset,
+ shuffle=True,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ verify_dataloader = DataLoader(
+ dataset,
+ shuffle=False,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+ num_samples = min(total_sample, step * batch_size)
+ if step < 0:
+ num_samples = None
+ calibration_dataloader, _ = create_dataloaders(
+ dataset_dir,
+ img_sz=img_sz,
+ batch_size=batch_size,
+ workers=workers,
+ num_samples=num_samples,
+ )
+ return calibration_dataloader
diff --git a/models/cv/classification/deit_tiny/ixrt/common.py b/models/cv/classification/deit_tiny/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd6a84d85fcc4ede8f64ed0e0a2dd7022527a956
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/common.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+ batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+ values, indices = batch_score.topk(5)
+ top1, top5 = 0, 0
+ for idx, label in enumerate(batch_label):
+
+ if label == indices[idx][0]:
+ top1 += 1
+ if label in indices[idx]:
+ top5 += 1
+ return top1, top5
+
+def create_engine_context(engine_path, logger):
+ with open(engine_path, "rb") as f:
+ runtime = tensorrt.Runtime(logger)
+ assert runtime
+ engine = runtime.deserialize_cuda_engine(f.read())
+ assert engine
+ context = engine.create_execution_context()
+ assert context
+
+ return engine, context
+
+def get_io_bindings(engine):
+ # Setup I/O bindings
+ inputs = []
+ outputs = []
+ allocations = []
+
+ for i in range(engine.num_bindings):
+ is_input = False
+ if engine.binding_is_input(i):
+ is_input = True
+ name = engine.get_binding_name(i)
+ dtype = engine.get_binding_dtype(i)
+ shape = engine.get_binding_shape(i)
+ if is_input:
+ batch_size = shape[0]
+ size = np.dtype(tensorrt.nptype(dtype)).itemsize
+ for s in shape:
+ size *= s
+ err, allocation = cudart.cudaMalloc(size)
+ assert err == cudart.cudaError_t.cudaSuccess
+ binding = {
+ "index": i,
+ "name": name,
+ "dtype": np.dtype(tensorrt.nptype(dtype)),
+ "shape": list(shape),
+ "allocation": allocation,
+ "nbytes": size,
+ }
+ print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}")
+ allocations.append(allocation)
+ if engine.binding_is_input(i):
+ inputs.append(binding)
+ else:
+ outputs.append(binding)
+ return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/classification/deit_tiny/ixrt/config/DEIT_TINY_CONFIG b/models/cv/classification/deit_tiny/ixrt/config/DEIT_TINY_CONFIG
new file mode 100644
index 0000000000000000000000000000000000000000..2464e4f5679a0a2beaab9f17d94390ee7769cf47
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/config/DEIT_TINY_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=deit_tiny
+ORIGINE_MODEL=deit_tiny_opt.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+ # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema]
+ # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape)
+ # QUANT_STEP : 量化步数
+ # QUANT_SEED : 随机种子 保证量化结果可复现
+ # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=minmax
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/deit_tiny/ixrt/export.py b/models/cv/classification/deit_tiny/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..0078670ffbf4bbcce3358d4a2cedc42ce61176f5
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/export.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+
+import torch
+from mmcls.apis import init_model
+
+class Model(torch.nn.Module):
+ def __init__(self, config_file, checkpoint_file):
+ super().__init__()
+ self.model = init_model(config_file, checkpoint_file, device="cpu")
+
+ def forward(self, x):
+ feat = self.model.backbone(x)
+ head = self.model.head.pre_logits(feat)
+ out_head = self.model.head.layers.head(head)
+ return out_head
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ config_file = args.cfg
+ checkpoint_file = args.weight
+ model = Model(config_file, checkpoint_file).eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
+
diff --git a/models/cv/classification/deit_tiny/ixrt/inference.py b/models/cv/classification/deit_tiny/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..22f1644ced656c96602e15e468166d6df4fec92c
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/inference.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+ dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+ host_mem = tensorrt.IHostMemory
+ logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+ # Load Engine && I/O bindings
+ engine, context = create_engine_context(config.engine_file, logger)
+ inputs, outputs, allocations = get_io_bindings(engine)
+
+ # Warm up
+ if config.warm_up > 0:
+ print("\nWarm Start.")
+ for i in range(config.warm_up):
+ context.execute_v2(allocations)
+ print("Warm Done.")
+
+ # Inference
+ if config.test_mode == "FPS":
+ torch.cuda.synchronize()
+ start_time = time.time()
+
+ for i in range(config.loop_count):
+ context.execute_v2(allocations)
+
+ torch.cuda.synchronize()
+ end_time = time.time()
+ forward_time = end_time - start_time
+
+ num_samples = 50000
+ if config.loop_count * config.bsz < num_samples:
+ num_samples = config.loop_count * config.bsz
+ fps = num_samples / forward_time
+
+ print("FPS : ", fps)
+ print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+ if fps >= config.fps_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+ elif config.test_mode == "ACC":
+
+ ## Prepare the output data
+ output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+ print(f"output shape : {output.shape} output type : {output.dtype}")
+
+ total_sample = 0
+ acc_top1, acc_top5 = 0, 0
+
+ with tqdm(total= len(dataloader)) as _tqdm:
+ for idx, (batch_data, batch_label) in enumerate(dataloader):
+ batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+ batch_data = np.ascontiguousarray(batch_data)
+ total_sample += batch_data.shape[0]
+ (err,) = cudart.cudaMemcpy(
+ inputs[0]["allocation"],
+ batch_data,
+ batch_data.nbytes,
+ cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+ # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+ context.execute_v2(allocations)
+ (err,) = cudart.cudaMemcpy(
+ output,
+ outputs[0]["allocation"],
+ outputs[0]["nbytes"],
+ cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+ # cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+ # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+ if len(output.shape) == 4:
+ output = output.squeeze(axis=(2,3))
+
+ batch_top1, batch_top5 = eval_batch(output, batch_label)
+ acc_top1 += batch_top1
+ acc_top5 += batch_top5
+
+ _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+ acc_5='{:.4f}'.format(acc_top5/total_sample))
+ _tqdm.update(1)
+ err, = cudart.cudaFree(inputs[0]["allocation"])
+ assert err == cudart.cudaError_t.cudaSuccess
+ err, = cudart.cudaFree(outputs[0]["allocation"])
+ assert err == cudart.cudaError_t.cudaSuccess
+ print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+ print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+ acc1 = acc_top1/total_sample
+ print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+ if acc1 >= config.acc_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+def parse_config():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+ parser.add_argument(
+ "--engine_file",
+ type=str,
+ help="engine file path"
+ )
+ parser.add_argument(
+ "--datasets_dir",
+ type=str,
+ default="",
+ help="ImageNet dir",
+ )
+ parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+ parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+ parser.add_argument(
+ "--imgsz",
+ "--img",
+ "--img-size",
+ type=int,
+ default=224,
+ help="inference size h,w",
+ )
+ parser.add_argument("--use_async", action="store_true")
+ parser.add_argument(
+ "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+ )
+ parser.add_argument("--fps_target", type=float, default=-1.0)
+ parser.add_argument("--acc_target", type=float, default=-1.0)
+ parser.add_argument("--loop_count", type=int, default=-1)
+
+ config = parser.parse_args()
+ return config
+
+if __name__ == "__main__":
+ config = parse_config()
+ main(config)
diff --git a/models/cv/classification/deit_tiny/ixrt/modify_batchsize.py b/models/cv/classification/deit_tiny/ixrt/modify_batchsize.py
new file mode 100644
index 0000000000000000000000000000000000000000..689b7a972dcbfec77c185592ede16bb4f04fa4fd
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/modify_batchsize.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+ batch_size = bsz
+
+ # The following code changes the first dimension of every input to be batch_size
+ # Modify as appropriate ... note that this requires all inputs to
+ # have the same batch_size
+ inputs = model.graph.input
+ for input in inputs:
+ # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+ # Add checks as needed.
+ dim1 = input.type.tensor_type.shape.dim[0]
+ # update dim to be a symbolic value
+ if isinstance(batch_size, str):
+ # set dynamic batch size
+ dim1.dim_param = batch_size
+ elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+ # set given batch size
+ dim1.dim_value = int(batch_size)
+ else:
+ # set batch size of 1
+ dim1.dim_value = 1
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--batch_size", type=int)
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+
+
+
+
diff --git a/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_accuracy.sh b/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0379dbe2f67a065abc0826cda83686aabb1cb34e
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_accuracy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/DEIT_TINY_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --acc_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_performance.sh b/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..18b48851daed8821def8500cf55b6387c32c3add
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_performance.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/DEIT_TINY_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --fps_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/deit_tiny/ixrt/simplify_model.py b/models/cv/classification/deit_tiny/ixrt/simplify_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..9948a9fa083ff99ff88e556e96614b02cccaa965
--- /dev/null
+++ b/models/cv/classification/deit_tiny/ixrt/simplify_model.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+ onnx_model = onnx.load(args.origin_model)
+ model_simp, check = simplify(onnx_model)
+ model_simp = onnx.shape_inference.infer_shapes(model_simp)
+ onnx.save(model_simp, args.output_model)
+ print(" Simplify onnx Done.")
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ parser.add_argument("--reshape", action="store_true")
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+simplify_model(args)
+
+
+
+
diff --git a/models/cv/classification/densenet201/ixrt/README.md b/models/cv/classification/densenet201/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9475535534e4da7c0dc4710d51addbcaaec866d3
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/README.md
@@ -0,0 +1,58 @@
+# DenseNet201
+
+## Description
+
+DenseNet201 is a deep convolutional neural network that stands out for its unique dense connection architecture, where each layer integrates features from all previous layers, effectively reusing features and reducing the number of parameters. This design not only enhances the network's information flow and parameter efficiency but also increases the model's regularization effect, helping to prevent overfitting. DenseNet201 consists of multiple dense blocks and transition layers, capable of capturing rich feature representations while maintaining computational efficiency, making it suitable for complex image recognition tasks.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install ppq
+pip3 install tqdm
+pip3 install cuda-python
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight densenet201-c1103571.pth --output densenet201.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_densenet201_fp16_accuracy.sh
+# Performance
+bash scripts/infer_densenet201_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ----------- | --------- | --------- | -------- | -------- | -------- |
+| DenseNet201 | 32 | FP16 | 788.946 | 76.88 | 93.37 |
diff --git a/models/cv/classification/densenet201/ixrt/build_engine.py b/models/cv/classification/densenet201/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..038c15d5af7a51da6386aa0f994e830178330fb3
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/build_engine.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+def main(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ build_config.set_flag(precision)
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+ help="The precision of datatype")
+ parser.add_argument("--engine", type=str, default=None)
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ args = parse_args()
+ main(args)
\ No newline at end of file
diff --git a/models/cv/classification/densenet201/ixrt/calibration_dataset.py b/models/cv/classification/densenet201/ixrt/calibration_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec931c656abf5b2309dc9938490df46e4e8cdb19
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/calibration_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+ def __init__(self, *args, **kwargs):
+ super(CalibrationImageNet, self).__init__(*args, **kwargs)
+ img2label_path = os.path.join(self.root, "val_map.txt")
+ if not os.path.exists(img2label_path):
+ raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+ self.img2label_map = self.make_img2label_map(img2label_path)
+
+ def make_img2label_map(self, path):
+ with open(path) as f:
+ lines = f.readlines()
+
+ img2lable_map = dict()
+ for line in lines:
+ line = line.lstrip().rstrip().split("\t")
+ if len(line) != 2:
+ continue
+ img_name, label = line
+ img_name = img_name.strip()
+ if img_name in [None, ""]:
+ continue
+ label = int(label.strip())
+ img2lable_map[img_name] = label
+ return img2lable_map
+
+ def __getitem__(self, index):
+ path, target = self.samples[index]
+ sample = self.loader(path)
+ if self.transform is not None:
+ sample = self.transform(sample)
+ # if self.target_transform is not None:
+ # target = self.target_transform(target)
+ img_name = os.path.basename(path)
+ target = self.img2label_map[img_name]
+
+ return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+ dataset = CalibrationImageNet(
+ data_path,
+ transform=T.Compose(
+ [
+ T.Resize(256),
+ T.CenterCrop(img_sz),
+ T.ToTensor(),
+ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+ ]
+ ),
+ )
+
+ calibration_dataset = dataset
+ if num_samples is not None:
+ calibration_dataset = torch.utils.data.Subset(
+ dataset, indices=range(num_samples)
+ )
+
+ calibration_dataloader = DataLoader(
+ calibration_dataset,
+ shuffle=True,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ verify_dataloader = DataLoader(
+ dataset,
+ shuffle=False,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+ num_samples = min(total_sample, step * batch_size)
+ if step < 0:
+ num_samples = None
+ calibration_dataloader, _ = create_dataloaders(
+ dataset_dir,
+ img_sz=img_sz,
+ batch_size=batch_size,
+ workers=workers,
+ num_samples=num_samples,
+ )
+ return calibration_dataloader
diff --git a/models/cv/classification/densenet201/ixrt/common.py b/models/cv/classification/densenet201/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd6a84d85fcc4ede8f64ed0e0a2dd7022527a956
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/common.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+ batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+ values, indices = batch_score.topk(5)
+ top1, top5 = 0, 0
+ for idx, label in enumerate(batch_label):
+
+ if label == indices[idx][0]:
+ top1 += 1
+ if label in indices[idx]:
+ top5 += 1
+ return top1, top5
+
+def create_engine_context(engine_path, logger):
+ with open(engine_path, "rb") as f:
+ runtime = tensorrt.Runtime(logger)
+ assert runtime
+ engine = runtime.deserialize_cuda_engine(f.read())
+ assert engine
+ context = engine.create_execution_context()
+ assert context
+
+ return engine, context
+
+def get_io_bindings(engine):
+ # Setup I/O bindings
+ inputs = []
+ outputs = []
+ allocations = []
+
+ for i in range(engine.num_bindings):
+ is_input = False
+ if engine.binding_is_input(i):
+ is_input = True
+ name = engine.get_binding_name(i)
+ dtype = engine.get_binding_dtype(i)
+ shape = engine.get_binding_shape(i)
+ if is_input:
+ batch_size = shape[0]
+ size = np.dtype(tensorrt.nptype(dtype)).itemsize
+ for s in shape:
+ size *= s
+ err, allocation = cudart.cudaMalloc(size)
+ assert err == cudart.cudaError_t.cudaSuccess
+ binding = {
+ "index": i,
+ "name": name,
+ "dtype": np.dtype(tensorrt.nptype(dtype)),
+ "shape": list(shape),
+ "allocation": allocation,
+ "nbytes": size,
+ }
+ print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}")
+ allocations.append(allocation)
+ if engine.binding_is_input(i):
+ inputs.append(binding)
+ else:
+ outputs.append(binding)
+ return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/classification/densenet201/ixrt/config/DENSENET201_CONFIG b/models/cv/classification/densenet201/ixrt/config/DENSENET201_CONFIG
new file mode 100644
index 0000000000000000000000000000000000000000..df472521a3b2507c9f0547316b4a0f075736e885
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/config/DENSENET201_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=224
+MODEL_NAME=densenet201
+ORIGINE_MODEL=densenet201.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+ # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema]
+ # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape)
+ # QUANT_STEP : 量化步数
+ # QUANT_SEED : 随机种子 保证量化结果可复现
+ # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=minmax
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/densenet201/ixrt/export.py b/models/cv/classification/densenet201/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..66019547bf1101889f3fd699581741a114890959
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import torch
+import torchvision
+import argparse
+import re
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ model = torchvision.models.densenet201(weights=False)
+
+ state_dict = torch.load(args.weight)
+
+ pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$'
+ )
+ for key in list(state_dict.keys()):
+ res = pattern.match(key)
+ if res:
+ new_key = res.group(1) + res.group(2)
+ state_dict[new_key] = state_dict[key]
+ del state_dict[key]
+
+ model.load_state_dict(state_dict)
+ model.eval()
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 224, 224)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/classification/densenet201/ixrt/inference.py b/models/cv/classification/densenet201/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..22f1644ced656c96602e15e468166d6df4fec92c
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/inference.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from calibration_dataset import getdataloader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def main(config):
+ dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz)
+
+ host_mem = tensorrt.IHostMemory
+ logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+ # Load Engine && I/O bindings
+ engine, context = create_engine_context(config.engine_file, logger)
+ inputs, outputs, allocations = get_io_bindings(engine)
+
+ # Warm up
+ if config.warm_up > 0:
+ print("\nWarm Start.")
+ for i in range(config.warm_up):
+ context.execute_v2(allocations)
+ print("Warm Done.")
+
+ # Inference
+ if config.test_mode == "FPS":
+ torch.cuda.synchronize()
+ start_time = time.time()
+
+ for i in range(config.loop_count):
+ context.execute_v2(allocations)
+
+ torch.cuda.synchronize()
+ end_time = time.time()
+ forward_time = end_time - start_time
+
+ num_samples = 50000
+ if config.loop_count * config.bsz < num_samples:
+ num_samples = config.loop_count * config.bsz
+ fps = num_samples / forward_time
+
+ print("FPS : ", fps)
+ print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+ if fps >= config.fps_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+ elif config.test_mode == "ACC":
+
+ ## Prepare the output data
+ output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+ print(f"output shape : {output.shape} output type : {output.dtype}")
+
+ total_sample = 0
+ acc_top1, acc_top5 = 0, 0
+
+ with tqdm(total= len(dataloader)) as _tqdm:
+ for idx, (batch_data, batch_label) in enumerate(dataloader):
+ batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+ batch_data = np.ascontiguousarray(batch_data)
+ total_sample += batch_data.shape[0]
+ (err,) = cudart.cudaMemcpy(
+ inputs[0]["allocation"],
+ batch_data,
+ batch_data.nbytes,
+ cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+ # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+ context.execute_v2(allocations)
+ (err,) = cudart.cudaMemcpy(
+ output,
+ outputs[0]["allocation"],
+ outputs[0]["nbytes"],
+ cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+ # cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+ # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+ if len(output.shape) == 4:
+ output = output.squeeze(axis=(2,3))
+
+ batch_top1, batch_top5 = eval_batch(output, batch_label)
+ acc_top1 += batch_top1
+ acc_top5 += batch_top5
+
+ _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+ acc_5='{:.4f}'.format(acc_top5/total_sample))
+ _tqdm.update(1)
+ err, = cudart.cudaFree(inputs[0]["allocation"])
+ assert err == cudart.cudaError_t.cudaSuccess
+ err, = cudart.cudaFree(outputs[0]["allocation"])
+ assert err == cudart.cudaError_t.cudaSuccess
+ print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+ print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+ acc1 = acc_top1/total_sample
+ print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+ if acc1 >= config.acc_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+def parse_config():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+ parser.add_argument(
+ "--engine_file",
+ type=str,
+ help="engine file path"
+ )
+ parser.add_argument(
+ "--datasets_dir",
+ type=str,
+ default="",
+ help="ImageNet dir",
+ )
+ parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+ parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+ parser.add_argument(
+ "--imgsz",
+ "--img",
+ "--img-size",
+ type=int,
+ default=224,
+ help="inference size h,w",
+ )
+ parser.add_argument("--use_async", action="store_true")
+ parser.add_argument(
+ "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+ )
+ parser.add_argument("--fps_target", type=float, default=-1.0)
+ parser.add_argument("--acc_target", type=float, default=-1.0)
+ parser.add_argument("--loop_count", type=int, default=-1)
+
+ config = parser.parse_args()
+ return config
+
+if __name__ == "__main__":
+ config = parse_config()
+ main(config)
diff --git a/models/cv/classification/densenet201/ixrt/modify_batchsize.py b/models/cv/classification/densenet201/ixrt/modify_batchsize.py
new file mode 100644
index 0000000000000000000000000000000000000000..689b7a972dcbfec77c185592ede16bb4f04fa4fd
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/modify_batchsize.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+ batch_size = bsz
+
+ # The following code changes the first dimension of every input to be batch_size
+ # Modify as appropriate ... note that this requires all inputs to
+ # have the same batch_size
+ inputs = model.graph.input
+ for input in inputs:
+ # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+ # Add checks as needed.
+ dim1 = input.type.tensor_type.shape.dim[0]
+ # update dim to be a symbolic value
+ if isinstance(batch_size, str):
+ # set dynamic batch size
+ dim1.dim_param = batch_size
+ elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+ # set given batch size
+ dim1.dim_value = int(batch_size)
+ else:
+ # set batch size of 1
+ dim1.dim_value = 1
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--batch_size", type=int)
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+
+
+
+
diff --git a/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_accuracy.sh b/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..30024cbc93039df2675d7cdfe25b373d8ca89de4
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_accuracy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/DENSENET201_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --acc_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_performance.sh b/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..42b81e12d6c9e0b3be289ae1562cd39538335fd2
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_performance.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/DENSENET201_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --fps_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/densenet201/ixrt/simplify_model.py b/models/cv/classification/densenet201/ixrt/simplify_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..9948a9fa083ff99ff88e556e96614b02cccaa965
--- /dev/null
+++ b/models/cv/classification/densenet201/ixrt/simplify_model.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+ onnx_model = onnx.load(args.origin_model)
+ model_simp, check = simplify(onnx_model)
+ model_simp = onnx.shape_inference.infer_shapes(model_simp)
+ onnx.save(model_simp, args.output_model)
+ print(" Simplify onnx Done.")
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ parser.add_argument("--reshape", action="store_true")
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+simplify_model(args)
+
+
+
+
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md b/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bc6836d029c2b1b8312e5fd363d1075158dfd910
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md
@@ -0,0 +1,59 @@
+# EfficientNetv2_rw_t
+
+## Description
+
+EfficientNetV2_rw_t is an enhanced version of the EfficientNet family of convolutional neural network architectures. It builds upon the success of its predecessors by introducing novel advancements aimed at further improving performance and efficiency in various computer vision tasks.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install timm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install tabulate
+pip3 install ppq
+pip3 install tqdm
+pip3 install cuda-python
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight efficientnetv2_t_agc-3620981a.pth --output efficientnetv2_rw_t.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh
+# Performance
+bash scripts/infer_efficientnetv2_rw_t_fp16_performance.sh
+```
+
+## Results
+
+Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%)
+--------------------|-----------|----------|---------|---------|--------
+Efficientnetv2_rw_t | 32 | FP16 | 1525.22 | 82.336 | 96.194
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/build_engine.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..038c15d5af7a51da6386aa0f994e830178330fb3
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/build_engine.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+def main(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ build_config.set_flag(precision)
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+ help="The precision of datatype")
+ parser.add_argument("--engine", type=str, default=None)
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ args = parse_args()
+ main(args)
\ No newline at end of file
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/calibration_dataset.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/calibration_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec931c656abf5b2309dc9938490df46e4e8cdb19
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/calibration_dataset.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import os
+
+import torch
+import torchvision.datasets
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision import transforms as T
+
+
+class CalibrationImageNet(torchvision.datasets.ImageFolder):
+ def __init__(self, *args, **kwargs):
+ super(CalibrationImageNet, self).__init__(*args, **kwargs)
+ img2label_path = os.path.join(self.root, "val_map.txt")
+ if not os.path.exists(img2label_path):
+ raise FileNotFoundError(f"Not found label file `{img2label_path}`.")
+
+ self.img2label_map = self.make_img2label_map(img2label_path)
+
+ def make_img2label_map(self, path):
+ with open(path) as f:
+ lines = f.readlines()
+
+ img2lable_map = dict()
+ for line in lines:
+ line = line.lstrip().rstrip().split("\t")
+ if len(line) != 2:
+ continue
+ img_name, label = line
+ img_name = img_name.strip()
+ if img_name in [None, ""]:
+ continue
+ label = int(label.strip())
+ img2lable_map[img_name] = label
+ return img2lable_map
+
+ def __getitem__(self, index):
+ path, target = self.samples[index]
+ sample = self.loader(path)
+ if self.transform is not None:
+ sample = self.transform(sample)
+ # if self.target_transform is not None:
+ # target = self.target_transform(target)
+ img_name = os.path.basename(path)
+ target = self.img2label_map[img_name]
+
+ return sample, target
+
+
+def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0):
+ dataset = CalibrationImageNet(
+ data_path,
+ transform=T.Compose(
+ [
+ T.Resize(256),
+ T.CenterCrop(img_sz),
+ T.ToTensor(),
+ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+ ]
+ ),
+ )
+
+ calibration_dataset = dataset
+ if num_samples is not None:
+ calibration_dataset = torch.utils.data.Subset(
+ dataset, indices=range(num_samples)
+ )
+
+ calibration_dataloader = DataLoader(
+ calibration_dataset,
+ shuffle=True,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ verify_dataloader = DataLoader(
+ dataset,
+ shuffle=False,
+ batch_size=batch_size,
+ drop_last=False,
+ num_workers=workers,
+ )
+
+ return calibration_dataloader, verify_dataloader
+
+
+def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000):
+ num_samples = min(total_sample, step * batch_size)
+ if step < 0:
+ num_samples = None
+ calibration_dataloader, _ = create_dataloaders(
+ dataset_dir,
+ img_sz=img_sz,
+ batch_size=batch_size,
+ workers=workers,
+ num_samples=num_samples,
+ )
+ return calibration_dataloader
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/common.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd6a84d85fcc4ede8f64ed0e0a2dd7022527a956
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/common.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def eval_batch(batch_score, batch_label):
+ batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32)
+ values, indices = batch_score.topk(5)
+ top1, top5 = 0, 0
+ for idx, label in enumerate(batch_label):
+
+ if label == indices[idx][0]:
+ top1 += 1
+ if label in indices[idx]:
+ top5 += 1
+ return top1, top5
+
+def create_engine_context(engine_path, logger):
+ with open(engine_path, "rb") as f:
+ runtime = tensorrt.Runtime(logger)
+ assert runtime
+ engine = runtime.deserialize_cuda_engine(f.read())
+ assert engine
+ context = engine.create_execution_context()
+ assert context
+
+ return engine, context
+
+def get_io_bindings(engine):
+ # Setup I/O bindings
+ inputs = []
+ outputs = []
+ allocations = []
+
+ for i in range(engine.num_bindings):
+ is_input = False
+ if engine.binding_is_input(i):
+ is_input = True
+ name = engine.get_binding_name(i)
+ dtype = engine.get_binding_dtype(i)
+ shape = engine.get_binding_shape(i)
+ if is_input:
+ batch_size = shape[0]
+ size = np.dtype(tensorrt.nptype(dtype)).itemsize
+ for s in shape:
+ size *= s
+ err, allocation = cudart.cudaMalloc(size)
+ assert err == cudart.cudaError_t.cudaSuccess
+ binding = {
+ "index": i,
+ "name": name,
+ "dtype": np.dtype(tensorrt.nptype(dtype)),
+ "shape": list(shape),
+ "allocation": allocation,
+ "nbytes": size,
+ }
+ print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}")
+ allocations.append(allocation)
+ if engine.binding_is_input(i):
+ inputs.append(binding)
+ else:
+ outputs.append(binding)
+ return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/config/EFFICIENTNETV2_CONFIG b/models/cv/classification/efficientnetv2_rw_t/ixrt/config/EFFICIENTNETV2_CONFIG
new file mode 100644
index 0000000000000000000000000000000000000000..f3343fd3eab2d2d3fbc660ec1a94e4932a69c90f
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/config/EFFICIENTNETV2_CONFIG
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+# IMGSIZE : 模型输入hw大小
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件名称
+IMGSIZE=288
+MODEL_NAME=efficientnetv2_rw_t
+ORIGINE_MODEL=efficientnetv2_rw_t.onnx
+
+# QUANT CONFIG (仅PRECISION为int8时生效)
+ # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema]
+ # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape)
+ # QUANT_STEP : 量化步数
+ # QUANT_SEED : 随机种子 保证量化结果可复现
+ # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写
+QUANT_OBSERVER=minmax
+QUANT_BATCHSIZE=1
+QUANT_STEP=32
+QUANT_SEED=42
+DISABLE_QUANT_LIST=
+QUANT_EXIST_ONNX=
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/export.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c3f579c02f06cec8be82f7ca1a797ecbab960c3
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/export.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import timm
+import torch
+import argparse
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ model = timm.create_model('efficientnetv2_rw_t', checkpoint_path=args.weight)
+ model.eval()
+
+ dummy_input = torch.randn([32, 3, 288, 288])
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ opset_version=13,
+ do_constant_folding=True,
+ input_names=["input"],
+ output_names=["output"],
+ dynamic_axes={"input": {0: "batch"}, "output": {0: "batch"}}
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/inference.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..e04f1c3aead1aae2aec278cce6a60d49b9687ee0
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/inference.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+import json
+import os
+import re
+import time
+from tqdm import tqdm
+
+import cv2
+import numpy as np
+from cuda import cuda, cudart
+import torch
+import tensorrt
+
+from timm.data import create_dataset, create_loader
+from common import eval_batch, create_engine_context, get_io_bindings
+
+def get_dataloader(data_path, batch_size, num_workers):
+ datasets = create_dataset(root=data_path, name="")
+
+ dataloader = create_loader(
+ datasets,
+ input_size=(3, 288, 288),
+ batch_size=batch_size,
+ interpolation='bicubic',
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225),
+ crop_pct=1.0,
+ use_prefetcher = False,
+ num_workers = num_workers
+ )
+ return dataloader
+
+def main(config):
+ dataloader = get_dataloader(config.datasets_dir, config.bsz, 16)
+
+ host_mem = tensorrt.IHostMemory
+ logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+ # Load Engine && I/O bindings
+ engine, context = create_engine_context(config.engine_file, logger)
+ inputs, outputs, allocations = get_io_bindings(engine)
+
+ # Warm up
+ if config.warm_up > 0:
+ print("\nWarm Start.")
+ for i in range(config.warm_up):
+ context.execute_v2(allocations)
+ print("Warm Done.")
+
+ # Inference
+ if config.test_mode == "FPS":
+ torch.cuda.synchronize()
+ start_time = time.time()
+
+ for i in range(config.loop_count):
+ context.execute_v2(allocations)
+
+ torch.cuda.synchronize()
+ end_time = time.time()
+ forward_time = end_time - start_time
+
+ num_samples = 50000
+ if config.loop_count * config.bsz < num_samples:
+ num_samples = config.loop_count * config.bsz
+ fps = num_samples / forward_time
+
+ print("FPS : ", fps)
+ print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+ if fps >= config.fps_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+ elif config.test_mode == "ACC":
+
+ ## Prepare the output data
+ output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+ print(f"output shape : {output.shape} output type : {output.dtype}")
+
+ total_sample = 0
+ acc_top1, acc_top5 = 0, 0
+
+ with tqdm(total= len(dataloader)) as _tqdm:
+ for idx, (batch_data, batch_label) in enumerate(dataloader):
+ batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
+ batch_data = np.ascontiguousarray(batch_data)
+ total_sample += batch_data.shape[0]
+ (err,) = cudart.cudaMemcpy(
+ inputs[0]["allocation"],
+ batch_data,
+ batch_data.nbytes,
+ cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+ # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+ context.execute_v2(allocations)
+ (err,) = cudart.cudaMemcpy(
+ output,
+ outputs[0]["allocation"],
+ outputs[0]["nbytes"],
+ cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+ # cuda.memcpy_dtoh(output, outputs[0]["allocation"])
+
+ # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model
+ if len(output.shape) == 4:
+ output = output.squeeze(axis=(2,3))
+
+ batch_top1, batch_top5 = eval_batch(output, batch_label)
+ acc_top1 += batch_top1
+ acc_top5 += batch_top5
+
+ _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
+ acc_5='{:.4f}'.format(acc_top5/total_sample))
+ _tqdm.update(1)
+ err, = cudart.cudaFree(inputs[0]["allocation"])
+ assert err == cudart.cudaError_t.cudaSuccess
+ err, = cudart.cudaFree(outputs[0]["allocation"])
+ assert err == cudart.cudaError_t.cudaSuccess
+ print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
+ print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
+ acc1 = acc_top1/total_sample
+ print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}")
+ if acc1 >= config.acc_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+
+def parse_config():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP")
+ parser.add_argument(
+ "--engine_file",
+ type=str,
+ help="engine file path"
+ )
+ parser.add_argument(
+ "--datasets_dir",
+ type=str,
+ default="",
+ help="ImageNet dir",
+ )
+ parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times")
+ parser.add_argument("--bsz", type=int, default=32, help="test batch size")
+ parser.add_argument(
+ "--imgsz",
+ "--img",
+ "--img-size",
+ type=int,
+ default=224,
+ help="inference size h,w",
+ )
+ parser.add_argument("--use_async", action="store_true")
+ parser.add_argument(
+ "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4"
+ )
+ parser.add_argument("--fps_target", type=float, default=-1.0)
+ parser.add_argument("--acc_target", type=float, default=-1.0)
+ parser.add_argument("--loop_count", type=int, default=-1)
+
+ config = parser.parse_args()
+ return config
+
+if __name__ == "__main__":
+ config = parse_config()
+ main(config)
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/modify_batchsize.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/modify_batchsize.py
new file mode 100644
index 0000000000000000000000000000000000000000..689b7a972dcbfec77c185592ede16bb4f04fa4fd
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/modify_batchsize.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+ batch_size = bsz
+
+ # The following code changes the first dimension of every input to be batch_size
+ # Modify as appropriate ... note that this requires all inputs to
+ # have the same batch_size
+ inputs = model.graph.input
+ for input in inputs:
+ # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+ # Add checks as needed.
+ dim1 = input.type.tensor_type.shape.dim[0]
+ # update dim to be a symbolic value
+ if isinstance(batch_size, str):
+ # set dynamic batch size
+ dim1.dim_param = batch_size
+ elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+ # set given batch size
+ dim1.dim_value = int(batch_size)
+ else:
+ # set batch size of 1
+ dim1.dim_value = 1
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--batch_size", type=int)
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
+
+
+
+
+
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh b/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..86ecfac0a1285b9c03f477489eaa94fbc57951eb
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=0
+LOOP_COUNT=-1
+RUN_MODE=ACC
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNETV2_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --acc_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh b/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..597a7a64eaaf78f07d9c71e72064b94cbbbdb9ab
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+TGT=-1
+WARM_UP=3
+LOOP_COUNT=20
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNETV2_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+
+# Simplify Model
+let step++
+echo;
+echo [STEP ${step}] : Simplify Model
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \
+ --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${FINAL_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine_file=${ENGINE_FILE} \
+ --datasets_dir=${DATASETS_DIR} \
+ --imgsz=${IMGSIZE} \
+ --warm_up=${WARM_UP} \
+ --loop_count ${LOOP_COUNT} \
+ --test_mode ${RUN_MODE} \
+ --fps_target ${TGT} \
+ --bsz ${BSZ}; check_status
+
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/simplify_model.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/simplify_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..9948a9fa083ff99ff88e556e96614b02cccaa965
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/simplify_model.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+ onnx_model = onnx.load(args.origin_model)
+ model_simp, check = simplify(onnx_model)
+ model_simp = onnx.shape_inference.infer_shapes(model_simp)
+ onnx.save(model_simp, args.output_model)
+ print(" Simplify onnx Done.")
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ parser.add_argument("--reshape", action="store_true")
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+simplify_model(args)
+
+
+
+
diff --git a/models/cv/detection/foveabox/ixrt/README.md b/models/cv/detection/foveabox/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d86a2c3961f6ac2da5700d345380ab4415331f6b
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/README.md
@@ -0,0 +1,67 @@
+# FoveaBox
+
+## Description
+
+FoveaBox is an advanced anchor-free object detection framework that enhances accuracy and flexibility by directly predicting the existence and bounding box coordinates of objects. Utilizing a Feature Pyramid Network (FPN), it adeptly handles targets of varying scales, particularly excelling with objects of arbitrary aspect ratios. FoveaBox also demonstrates robustness against image deformations.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install ultralytics
+pip3 install pycocotools
+pip3 install mmdeploy
+pip3 install mmdet
+pip3 install opencv-python==4.6.0.66
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# export onnx model
+python3 export.py --weight fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth --cfg fovea_r50_fpn_4xb4-1x_coco.py --output foveabox.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim foveabox.onnx foveabox_opt.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_foveabox_fp16_accuracy.sh
+# Performance
+bash scripts/infer_foveabox_fp16_performance.sh
+```
+
+## Results
+
+Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 |
+---------|-----------|----------|----------|----------|---------------|
+FoveaBox | 32 | FP16 | 181.304 | 0.531 | 0.346 |
+
+## Reference
+
+mmdetection:
diff --git a/models/cv/detection/foveabox/ixrt/build_engine.py b/models/cv/detection/foveabox/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bba10d0ee686cd6931161a249a6be887ea7cb24
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/build_engine.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+
+def main(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ # print("precision : ", precision)
+ build_config.set_flag(precision)
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8",
+ help="The precision of datatype")
+ # engine args
+ parser.add_argument("--engine", type=str, default=None)
+
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ args = parse_args()
+ main(args)
\ No newline at end of file
diff --git a/models/cv/detection/foveabox/ixrt/common.py b/models/cv/detection/foveabox/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef92a6ba6291058d20f575edb09da35ebff3a937
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/common.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def create_engine_context(engine_path, logger):
+ with open(engine_path, "rb") as f:
+ runtime = tensorrt.Runtime(logger)
+ assert runtime
+ engine = runtime.deserialize_cuda_engine(f.read())
+ assert engine
+ context = engine.create_execution_context()
+ assert context
+
+ return engine, context
+
+def get_io_bindings(engine):
+ # Setup I/O bindings
+ inputs = []
+ outputs = []
+ allocations = []
+
+ for i in range(engine.num_bindings):
+ is_input = False
+ if engine.binding_is_input(i):
+ is_input = True
+ name = engine.get_binding_name(i)
+ dtype = engine.get_binding_dtype(i)
+ shape = engine.get_binding_shape(i)
+ if is_input:
+ batch_size = shape[0]
+ size = np.dtype(tensorrt.nptype(dtype)).itemsize
+ for s in shape:
+ size *= s
+ err, allocation = cudart.cudaMalloc(size)
+ assert err == cudart.cudaError_t.cudaSuccess
+ binding = {
+ "index": i,
+ "name": name,
+ "dtype": np.dtype(tensorrt.nptype(dtype)),
+ "shape": list(shape),
+ "allocation": allocation,
+ "nbytes": size,
+ }
+ print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}")
+ allocations.append(allocation)
+ if engine.binding_is_input(i):
+ inputs.append(binding)
+ else:
+ outputs.append(binding)
+ return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/detection/foveabox/ixrt/config/FOVEABOX_CONFIG b/models/cv/detection/foveabox/ixrt/config/FOVEABOX_CONFIG
new file mode 100644
index 0000000000000000000000000000000000000000..635c2b0de69ed23231edfa091badcffa3c5cc446
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/config/FOVEABOX_CONFIG
@@ -0,0 +1,31 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# BSZ : 构建engine以及推理时的batchsize
+# IMGSIZE : 模型输入hw大小
+# RUN_MODE : [FPS, MAP]
+# PRECISION : [float16, int8]
+# MODEL_NAME : 生成onnx/engine的basename
+# ORIGINE_MODEL : 原始onnx文件
+# COCO_GT : COCOEVAL标签文件
+# DATASET_DIR : 量化/推理数据集路径
+# CHECKPOINTS_DIR : 存放生成的onnx/engine路径
+# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合
+# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster
+IMGSIZE=800
+MODEL_NAME=foveabox
+ORIGINE_MODEL=foveabox_opt.onnx
+DATA_PROCESS_TYPE=foveabox
+MODEL_INPUT_NAMES=(input)
diff --git a/models/cv/detection/foveabox/ixrt/deploy_default.py b/models/cv/detection/foveabox/ixrt/deploy_default.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8d8e43dc829456f0c2e46a7acfc3128757f945d
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/deploy_default.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+onnx_config = dict(
+ type='onnx',
+ export_params=True,
+ keep_initializers_as_inputs=False,
+ opset_version=11,
+ save_file='end2end.onnx',
+ input_names=['input'],
+ output_names=['output'],
+ input_shape=None,
+ optimize=True)
+
+codebase_config = dict(
+ type='mmdet',
+ task='ObjectDetection',
+ model_type='end2end',
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ ))
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/detection/foveabox/ixrt/export.py b/models/cv/detection/foveabox/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..bceaba7801843feb5ef095f62a71a2f6e0074db4
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ deploy_cfg = 'deploy_default.py'
+ model_cfg = args.cfg
+ model_checkpoint = args.weight
+
+ deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+ task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+ model = task_processor.build_pytorch_model(model_checkpoint)
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 800, 800)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
+
diff --git a/models/cv/detection/foveabox/ixrt/fovea_r50_fpn_4xb4-1x_coco.py b/models/cv/detection/foveabox/ixrt/fovea_r50_fpn_4xb4-1x_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..5be9e50af49009813315b0f7b9fc3221134d8ee1
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/fovea_r50_fpn_4xb4-1x_coco.py
@@ -0,0 +1,287 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+backend_args = None
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+ checkpoint=dict(interval=1, type='CheckpointHook'),
+ logger=dict(interval=50, type='LoggerHook'),
+ param_scheduler=dict(type='ParamSchedulerHook'),
+ sampler_seed=dict(type='DistSamplerSeedHook'),
+ timer=dict(type='IterTimerHook'),
+ visualization=dict(type='DetVisualizationHook'))
+default_scope = 'mmdet'
+env_cfg = dict(
+ cudnn_benchmark=False,
+ dist_cfg=dict(backend='nccl'),
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+load_from = None
+log_level = 'INFO'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+model = dict(
+ backbone=dict(
+ depth=50,
+ frozen_stages=1,
+ init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'),
+ norm_cfg=dict(requires_grad=True, type='BN'),
+ norm_eval=True,
+ num_stages=4,
+ out_indices=(
+ 0,
+ 1,
+ 2,
+ 3,
+ ),
+ style='pytorch',
+ type='ResNet'),
+ bbox_head=dict(
+ base_edge_list=[
+ 16,
+ 32,
+ 64,
+ 128,
+ 256,
+ ],
+ feat_channels=256,
+ in_channels=256,
+ loss_bbox=dict(beta=0.11, loss_weight=1.0, type='SmoothL1Loss'),
+ loss_cls=dict(
+ alpha=0.4,
+ gamma=1.5,
+ loss_weight=1.0,
+ type='FocalLoss',
+ use_sigmoid=True),
+ num_classes=80,
+ scale_ranges=(
+ (
+ 1,
+ 64,
+ ),
+ (
+ 32,
+ 128,
+ ),
+ (
+ 64,
+ 256,
+ ),
+ (
+ 128,
+ 512,
+ ),
+ (
+ 256,
+ 2048,
+ ),
+ ),
+ sigma=0.4,
+ stacked_convs=4,
+ strides=[
+ 8,
+ 16,
+ 32,
+ 64,
+ 128,
+ ],
+ type='FoveaHead',
+ with_deform=False),
+ data_preprocessor=dict(
+ bgr_to_rgb=True,
+ mean=[
+ 123.675,
+ 116.28,
+ 103.53,
+ ],
+ pad_size_divisor=32,
+ std=[
+ 58.395,
+ 57.12,
+ 57.375,
+ ],
+ type='DetDataPreprocessor'),
+ neck=dict(
+ add_extra_convs='on_input',
+ in_channels=[
+ 256,
+ 512,
+ 1024,
+ 2048,
+ ],
+ num_outs=5,
+ out_channels=256,
+ start_level=1,
+ type='FPN'),
+ test_cfg=dict(
+ max_per_img=100,
+ nms=dict(iou_threshold=0.5, type='nms'),
+ nms_pre=1000,
+ score_thr=0.05),
+ train_cfg=dict(),
+ type='FOVEA')
+optim_wrapper = dict(
+ optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001),
+ type='OptimWrapper')
+param_scheduler = [
+ dict(
+ begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
+ dict(
+ begin=0,
+ by_epoch=True,
+ end=12,
+ gamma=0.1,
+ milestones=[
+ 8,
+ 11,
+ ],
+ type='MultiStepLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+ batch_size=32,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='images/val2017/'),
+ data_root='data/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+ ann_file='data/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+test_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+]
+train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
+ batch_size=4,
+ dataset=dict(
+ ann_file='annotations/instances_train2017.json',
+ backend_args=None,
+ data_prefix=dict(img='train2017/'),
+ data_root='data/coco/',
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+ ],
+ type='CocoDataset'),
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+ batch_size=1,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='val2017/'),
+ data_root='data/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+ ann_file='data/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+vis_backends = [
+ dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+ name='visualizer',
+ type='DetLocalVisualizer',
+ vis_backends=[
+ dict(type='LocalVisBackend'),
+ ])
+work_dir = './'
diff --git a/models/cv/detection/foveabox/ixrt/inference.py b/models/cv/detection/foveabox/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcfe0e22d465482280149396b099fdd95e6681f7
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/inference.py
@@ -0,0 +1,193 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import argparse
+import torch
+import torchvision
+import numpy as np
+import time
+from tqdm import tqdm
+from mmdet.registry import RUNNERS
+from mmengine.config import Config
+
+import tensorrt
+from common import create_engine_context, get_io_bindings
+from cuda import cuda, cudart
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine_file",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=0.0,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ host_mem = tensorrt.IHostMemory
+ logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+ # Load Engine && I/O bindings
+ engine, context = create_engine_context(args.engine_file, logger)
+ inputs, outputs, allocations = get_io_bindings(engine)
+
+ # just run perf test
+ if args.perf_only:
+ torch.cuda.synchronize()
+ start_time = time.time()
+
+ for i in range(100):
+ context.execute_v2(allocations)
+
+ torch.cuda.synchronize()
+ end_time = time.time()
+ forward_time = end_time - start_time
+
+ num_samples = 50000
+ if 100 * batch_size < num_samples:
+ num_samples = 100 * batch_size
+ fps = num_samples / forward_time
+
+ print("FPS : ", fps)
+ print(f"Performance Check : Test {fps} >= target {args.fps_target}")
+ if fps >= args.fps_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+ else:
+ # warm up
+ print("\nWarm Start.")
+ for i in range(args.warmup):
+ context.execute_v2(allocations)
+ print("Warm Done.")
+
+ # runner config
+ cfg = Config.fromfile("fovea_r50_fpn_4xb4-1x_coco.py")
+
+ cfg.work_dir = "./workspace"
+ cfg['test_dataloader']['batch_size'] = batch_size
+ cfg['test_dataloader']['dataset']['data_root'] = args.datasets
+ cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
+ cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+ cfg['log_level'] = 'ERROR'
+
+ # build runner
+ runner = RUNNERS.build(cfg)
+
+ for input_data in tqdm(runner.test_dataloader):
+
+ input_data = runner.model.data_preprocessor(input_data, False)
+ image = input_data['inputs'].cpu().numpy()
+
+ pad_batch = len(image) != batch_size
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ batch_data = image.astype(inputs[0]["dtype"])
+ batch_data = np.ascontiguousarray(image)
+
+ (err,) = cudart.cudaMemcpy(
+ inputs[0]["allocation"],
+ batch_data,
+ batch_data.nbytes,
+ cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+
+ context.execute_v2(allocations)
+
+ cls_score = []
+ box_reg = []
+
+ for i in range(len(outputs)):
+ output = np.zeros(outputs[i]["shape"], outputs[i]["dtype"])
+ (err,) = cudart.cudaMemcpy(
+ output,
+ outputs[i]["allocation"],
+ outputs[i]["nbytes"],
+ cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+
+ if pad_batch:
+ output = output[:origin_size]
+
+ output = torch.from_numpy(output)
+
+ if output.shape[1] == 80:
+ cls_score.append(output)
+ elif output.shape[1] == 4:
+ box_reg.append(output)
+
+ batch_img_metas = [
+ data_samples.metainfo for data_samples in input_data['data_samples']
+ ]
+
+ results_list = runner.model.bbox_head.predict_by_feat(cls_score, box_reg, batch_img_metas=batch_img_metas, rescale=True)
+
+ batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], results_list)
+
+ runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=input_data)
+
+ metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/foveabox/ixrt/modify_batchsize.py b/models/cv/detection/foveabox/ixrt/modify_batchsize.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a88c1603bd6f457fd4965257627dc29edcda4d1
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/modify_batchsize.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import onnx
+import argparse
+
+def change_input_dim(model, bsz):
+ batch_size = bsz
+
+ # The following code changes the first dimension of every input to be batch_size
+ # Modify as appropriate ... note that this requires all inputs to
+ # have the same batch_size
+ inputs = model.graph.input
+ for input in inputs:
+ # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+ # Add checks as needed.
+ dim1 = input.type.tensor_type.shape.dim[0]
+ # update dim to be a symbolic value
+ if isinstance(batch_size, str):
+ # set dynamic batch size
+ dim1.dim_param = batch_size
+ elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int):
+ # set given batch size
+ dim1.dim_value = int(batch_size)
+ else:
+ # set batch size of 1
+ dim1.dim_value = 1
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--batch_size", type=int)
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+model = onnx.load(args.origin_model)
+change_input_dim(model, args.batch_size)
+onnx.save(model, args.output_model)
\ No newline at end of file
diff --git a/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_accuracy.sh b/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4e73f22b7f1ab492676a7e6c0ebd36058b4fff2c
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_accuracy.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+WARM_UP=-1
+TGT=-1
+LOOP_COUNT=-1
+RUN_MODE=MAP
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/FOVEABOX_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+faster=0
+CURRENT_MODEL=${ORIGINE_MODEL}
+if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then
+ faster=1
+fi
+
+# Simplify Model
+let step++
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model skip, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model ${CURRENT_MODEL} \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+CURRENT_MODEL=${SIM_MODEL}
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py \
+ --batch_size ${BSZ} \
+ --origin_model ${CURRENT_MODEL} \
+ --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${CURRENT_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine ${ENGINE_FILE} \
+ --batchsize ${BSZ} \
+ --input_name input \
+ --datasets ${DATASETS_DIR}; check_status
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_performance.sh b/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..eb96bcb158443a9605be5a641032c2661050e402
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/scripts/infer_foveabox_fp16_performance.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+EXIT_STATUS=0
+check_status()
+{
+ if ((${PIPESTATUS[0]} != 0));then
+ EXIT_STATUS=1
+ fi
+}
+
+# Run paraments
+BSZ=32
+WARM_UP=-1
+TGT=-1
+LOOP_COUNT=-1
+RUN_MODE=FPS
+PRECISION=float16
+
+# Update arguments
+index=0
+options=$@
+arguments=($options)
+for argument in $options
+do
+ index=`expr $index + 1`
+ case $argument in
+ --bs) BSZ=${arguments[index]};;
+ --tgt) TGT=${arguments[index]};;
+ esac
+done
+
+PROJ_DIR=${PROJ_DIR:-"."}
+DATASETS_DIR="${DATASETS_DIR}"
+CHECKPOINTS_DIR="${PROJ_DIR}"
+RUN_DIR="${PROJ_DIR}"
+CONFIG_DIR="${PROJ_DIR}/config/FOVEABOX_CONFIG"
+source ${CONFIG_DIR}
+ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
+
+echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
+echo DATASETS_DIR : ${DATASETS_DIR}
+echo RUN_DIR : ${RUN_DIR}
+echo CONFIG_DIR : ${CONFIG_DIR}
+echo ====================== Model Info ======================
+echo Model Name : ${MODEL_NAME}
+echo Onnx Path : ${ORIGINE_MODEL}
+
+step=0
+faster=0
+CURRENT_MODEL=${ORIGINE_MODEL}
+if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then
+ faster=1
+fi
+
+# Simplify Model
+let step++
+echo [STEP ${step}] : Simplify Model
+SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
+if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model skip, ${SIM_MODEL} has been existed
+else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model ${CURRENT_MODEL} \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+fi
+CURRENT_MODEL=${SIM_MODEL}
+
+# Change Batchsize
+let step++
+echo;
+echo [STEP ${step}] : Change Batchsize
+FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}.onnx
+if [ -f $FINAL_MODEL ];then
+ echo " "Change Batchsize Skip, $FINAL_MODEL has been existed
+else
+ python3 ${RUN_DIR}/modify_batchsize.py \
+ --batch_size ${BSZ} \
+ --origin_model ${CURRENT_MODEL} \
+ --output_model ${FINAL_MODEL}
+ echo " "Generate ${FINAL_MODEL}
+fi
+CURRENT_MODEL=${FINAL_MODEL}
+
+# Build Engine
+let step++
+echo;
+echo [STEP ${step}] : Build Engine
+ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
+if [ -f $ENGINE_FILE ];then
+ echo " "Build Engine Skip, $ENGINE_FILE has been existed
+else
+ python3 ${RUN_DIR}/build_engine.py \
+ --precision ${PRECISION} \
+ --model ${CURRENT_MODEL} \
+ --engine ${ENGINE_FILE}
+ echo " "Generate Engine ${ENGINE_FILE}
+fi
+
+# Inference
+let step++
+echo;
+echo [STEP ${step}] : Inference
+python3 ${RUN_DIR}/inference.py \
+ --engine ${ENGINE_FILE} \
+ --batchsize ${BSZ} \
+ --input_name input \
+ --perf_only True \
+ --datasets ${DATASETS_DIR}; check_status
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/detection/foveabox/ixrt/simplify_model.py b/models/cv/detection/foveabox/ixrt/simplify_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..1400fd81ddb4b3fae1b20d0fd35082a692f5d292
--- /dev/null
+++ b/models/cv/detection/foveabox/ixrt/simplify_model.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import onnx
+import argparse
+from onnxsim import simplify
+
+# Simplify
+def simplify_model(args):
+ onnx_model = onnx.load(args.origin_model)
+ model_simp, check = simplify(onnx_model)
+ model_simp = onnx.shape_inference.infer_shapes(model_simp)
+ onnx.save(model_simp, args.output_model)
+ print(" Simplify onnx Done.")
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--origin_model", type=str)
+ parser.add_argument("--output_model", type=str)
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+simplify_model(args)
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/ixrt/README.md b/models/cv/detection/fsaf/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5d75a2e6e3d0df468786d9f99a416b331c59d8c5
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/README.md
@@ -0,0 +1,64 @@
+# FSAF
+
+## Description
+
+The FSAF (Feature Selective Anchor-Free) module is an innovative component for single-shot object detection that enhances performance through online feature selection and anchor-free branches. The FSAF module dynamically selects the most suitable feature level for each object instance, rather than relying on traditional anchor-based heuristic methods. This improvement significantly boosts the accuracy of object detection, especially for small targets and in complex scenes. Moreover, compared to existing anchor-based detectors, the FSAF module maintains high efficiency while adding negligible additional inference overhead.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install -r requirements.txt
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+```bash
+wget https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth
+```
+
+### Model Conversion
+
+```bash
+# export onnx model
+python3 export.py --weight fsaf_r50_fpn_1x_coco-94ccc51f.pth --cfg fsaf_r50_fpn_1x_coco.py --output fsaf.onnx
+
+# use onnxsim optimize onnx model
+onnxsim fsaf.onnx fsaf_opt.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_fsaf_fp16_accuracy.sh
+# Performance
+bash scripts/infer_fsaf_fp16_performance.sh
+```
+
+## Results
+
+Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 |
+-------|-----------|----------|----------|----------|---------------|
+FSAF | 32 | FP16 | 133.85 | 0.530 | 0.345 |
+
+## Reference
+
+mmdetection:
diff --git a/models/cv/detection/fsaf/ixrt/build_engine.py b/models/cv/detection/fsaf/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7ad10e5e6da847867df40aa68c5a4f8710753ea
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/build_engine.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+from tensorrt import Dims
+
+def main(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+ profile = builder.create_optimization_profile()
+ profile.set_shape("input", Dims([32, 3, 800, 800]), Dims([32, 3, 800, 800]), Dims([32, 3, 800, 800]))
+ build_config.add_optimization_profile(profile)
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ # print("precision : ", precision)
+ build_config.set_flag(precision)
+ num_inputs = network.num_inputs
+
+ for i in range(num_inputs):
+ input_tensor = network.get_input(i)
+ input_tensor.shape = Dims([32, 3, 800, 800])
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="float16",
+ help="The precision of datatype")
+ parser.add_argument("--engine", type=str, default=None)
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ args = parse_args()
+ main(args)
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/ixrt/common.py b/models/cv/detection/fsaf/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef92a6ba6291058d20f575edb09da35ebff3a937
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/common.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def create_engine_context(engine_path, logger):
+ with open(engine_path, "rb") as f:
+ runtime = tensorrt.Runtime(logger)
+ assert runtime
+ engine = runtime.deserialize_cuda_engine(f.read())
+ assert engine
+ context = engine.create_execution_context()
+ assert context
+
+ return engine, context
+
+def get_io_bindings(engine):
+ # Setup I/O bindings
+ inputs = []
+ outputs = []
+ allocations = []
+
+ for i in range(engine.num_bindings):
+ is_input = False
+ if engine.binding_is_input(i):
+ is_input = True
+ name = engine.get_binding_name(i)
+ dtype = engine.get_binding_dtype(i)
+ shape = engine.get_binding_shape(i)
+ if is_input:
+ batch_size = shape[0]
+ size = np.dtype(tensorrt.nptype(dtype)).itemsize
+ for s in shape:
+ size *= s
+ err, allocation = cudart.cudaMalloc(size)
+ assert err == cudart.cudaError_t.cudaSuccess
+ binding = {
+ "index": i,
+ "name": name,
+ "dtype": np.dtype(tensorrt.nptype(dtype)),
+ "shape": list(shape),
+ "allocation": allocation,
+ "nbytes": size,
+ }
+ print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}")
+ allocations.append(allocation)
+ if engine.binding_is_input(i):
+ inputs.append(binding)
+ else:
+ outputs.append(binding)
+ return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/ixrt/deploy_default.py b/models/cv/detection/fsaf/ixrt/deploy_default.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8d8e43dc829456f0c2e46a7acfc3128757f945d
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/deploy_default.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+onnx_config = dict(
+ type='onnx',
+ export_params=True,
+ keep_initializers_as_inputs=False,
+ opset_version=11,
+ save_file='end2end.onnx',
+ input_names=['input'],
+ output_names=['output'],
+ input_shape=None,
+ optimize=True)
+
+codebase_config = dict(
+ type='mmdet',
+ task='ObjectDetection',
+ model_type='end2end',
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ ))
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/ixrt/export.py b/models/cv/detection/fsaf/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..13573c9dff3d96be4ba59eaa8698d67fb1d50f13
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/export.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ deploy_cfg = 'deploy_default.py'
+ model_cfg = args.cfg
+ model_checkpoint = args.weight
+
+ deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+ task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+ model = task_processor.build_pytorch_model(model_checkpoint)
+
+ input_names = ['input']
+ dynamic_axes = {'input': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 800, 800)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/ixrt/fsaf_r50_fpn_1x_coco.py b/models/cv/detection/fsaf/ixrt/fsaf_r50_fpn_1x_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..d511321f40a1a3ca786e807d6f3a88f60c8bb994
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/fsaf_r50_fpn_1x_coco.py
@@ -0,0 +1,276 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+backend_args = None
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+ checkpoint=dict(interval=1, type='CheckpointHook'),
+ logger=dict(interval=50, type='LoggerHook'),
+ param_scheduler=dict(type='ParamSchedulerHook'),
+ sampler_seed=dict(type='DistSamplerSeedHook'),
+ timer=dict(type='IterTimerHook'),
+ visualization=dict(type='DetVisualizationHook'))
+default_scope = 'mmdet'
+env_cfg = dict(
+ cudnn_benchmark=False,
+ dist_cfg=dict(backend='nccl'),
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+load_from = None
+log_level = 'ERROR'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+model = dict(
+ backbone=dict(
+ depth=50,
+ frozen_stages=1,
+ init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'),
+ norm_cfg=dict(requires_grad=True, type='BN'),
+ norm_eval=True,
+ num_stages=4,
+ out_indices=(
+ 0,
+ 1,
+ 2,
+ 3,
+ ),
+ style='pytorch',
+ type='ResNet'),
+ bbox_head=dict(
+ anchor_generator=dict(
+ octave_base_scale=1,
+ ratios=[
+ 1.0,
+ ],
+ scales_per_octave=1,
+ strides=[
+ 8,
+ 16,
+ 32,
+ 64,
+ 128,
+ ],
+ type='AnchorGenerator'),
+ bbox_coder=dict(normalizer=4.0, type='TBLRBBoxCoder'),
+ feat_channels=256,
+ in_channels=256,
+ loss_bbox=dict(
+ eps=1e-06, loss_weight=1.0, reduction='none', type='IoULoss'),
+ loss_cls=dict(
+ alpha=0.25,
+ gamma=2.0,
+ loss_weight=1.0,
+ reduction='none',
+ type='FocalLoss',
+ use_sigmoid=True),
+ num_classes=80,
+ reg_decoded_bbox=True,
+ stacked_convs=4,
+ type='FSAFHead'),
+ data_preprocessor=dict(
+ bgr_to_rgb=True,
+ mean=[
+ 123.675,
+ 116.28,
+ 103.53,
+ ],
+ pad_size_divisor=32,
+ std=[
+ 58.395,
+ 57.12,
+ 57.375,
+ ],
+ type='DetDataPreprocessor'),
+ neck=dict(
+ add_extra_convs='on_input',
+ in_channels=[
+ 256,
+ 512,
+ 1024,
+ 2048,
+ ],
+ num_outs=5,
+ out_channels=256,
+ start_level=1,
+ type='FPN'),
+ test_cfg=dict(
+ max_per_img=100,
+ min_bbox_size=0,
+ nms=dict(iou_threshold=0.5, type='nms'),
+ nms_pre=1000,
+ score_thr=0.05),
+ train_cfg=dict(
+ allowed_border=-1,
+ assigner=dict(
+ min_pos_iof=0.01,
+ neg_scale=0.2,
+ pos_scale=0.2,
+ type='CenterRegionAssigner'),
+ debug=False,
+ pos_weight=-1,
+ sampler=dict(type='PseudoSampler')),
+ type='FSAF')
+optim_wrapper = dict(
+ optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
+ type='OptimWrapper')
+param_scheduler = [
+ dict(
+ begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
+ dict(
+ begin=0,
+ by_epoch=True,
+ end=12,
+ gamma=0.1,
+ milestones=[
+ 8,
+ 11,
+ ],
+ type='MultiStepLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+ batch_size=32,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='images/val2017/'),
+ data_root='data/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=False, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+ ann_file='data/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+test_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+]
+train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
+ batch_size=2,
+ dataset=dict(
+ ann_file='annotations/instances_train2017.json',
+ backend_args=None,
+ data_prefix=dict(img='train2017/'),
+ data_root='data/coco/',
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+ ],
+ type='CocoDataset'),
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+ batch_size=1,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='val2017/'),
+ data_root='data/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+ ann_file='data/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+vis_backends = [
+ dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+ name='visualizer',
+ type='DetLocalVisualizer',
+ vis_backends=[
+ dict(type='LocalVisBackend'),
+ ])
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/ixrt/inference.py b/models/cv/detection/fsaf/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d940cd5d656dea893acc1993992994a3c4de0a5
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/inference.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import time
+import argparse
+import tensorrt
+import torch
+import torchvision
+import numpy as np
+from tensorrt import Dims
+from cuda import cuda, cudart
+from tqdm import tqdm
+from mmdet.registry import RUNNERS
+from mmengine.config import Config
+
+from common import create_engine_context, get_io_bindings
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ host_mem = tensorrt.IHostMemory
+ logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+ # Load Engine && I/O bindings
+ engine, context = create_engine_context(args.engine, logger)
+ inputs, outputs, allocations = get_io_bindings(engine)
+
+ # just run perf test
+ if args.perf_only:
+ torch.cuda.synchronize()
+ start_time = time.time()
+
+ for i in range(10):
+ context.execute_v2(allocations)
+
+ torch.cuda.synchronize()
+ end_time = time.time()
+ forward_time = end_time - start_time
+ num_samples = 10 * args.batchsize
+ fps = num_samples / forward_time
+
+ print("FPS : ", fps)
+ print(f"Performance Check : Test {fps} >= target {args.fps_target}")
+ if fps >= args.fps_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+ else:
+ # runner config
+ cfg = Config.fromfile("fsaf_r50_fpn_1x_coco.py")
+
+ cfg.work_dir = "./workspace"
+ cfg['test_dataloader']['batch_size'] = batch_size
+ cfg['test_dataloader']['dataset']['data_root'] = args.datasets
+ cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
+ cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+ cfg['log_level'] = 'ERROR'
+
+ # build runner
+ runner = RUNNERS.build(cfg)
+
+ for data in tqdm(runner.test_dataloader):
+ cls_score = []
+ box_reg = []
+
+ input_data = runner.model.data_preprocessor(data, False)
+ image = input_data['inputs'].cpu()
+ image = image.numpy().astype(inputs[0]["dtype"])
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ image = np.ascontiguousarray(image)
+
+ (err,) = cudart.cudaMemcpy(
+ inputs[0]["allocation"],
+ image,
+ image.nbytes,
+ cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+ # cuda.memcpy_htod(inputs[0]["allocation"], batch_data)
+ context.execute_v2(allocations)
+
+ for i in range(len(outputs)):
+ output = np.zeros(outputs[i]["shape"], outputs[i]["dtype"])
+ (err,) = cudart.cudaMemcpy(
+ output,
+ outputs[i]["allocation"],
+ outputs[i]["nbytes"],
+ cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+
+ if pad_batch:
+ output = output[:origin_size]
+
+ output = torch.from_numpy(output)
+
+ if output.shape[1] == 80:
+ cls_score.append(output)
+ elif output.shape[1] == 4:
+ box_reg.append(output)
+
+ batch_img_metas = [
+ data_samples.metainfo for data_samples in data['data_samples']
+ ]
+
+ preds = runner.model.bbox_head.predict_by_feat(
+ cls_score, box_reg, batch_img_metas=batch_img_metas, rescale=True
+ )
+
+ batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], preds)
+
+ runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=data)
+
+ metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/models/cv/detection/fsaf/ixrt/requirements.txt b/models/cv/detection/fsaf/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a26706ef5402ca820ed6e4ab952d876ec768b4eb
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/requirements.txt
@@ -0,0 +1,6 @@
+onnx
+tqdm
+onnxsim
+mmdet==3.3.0
+mmdeploy==1.3.1
+mmengine==0.10.4
diff --git a/models/cv/detection/fsaf/ixrt/scripts/infer_fsaf_fp16_accuracy.sh b/models/cv/detection/fsaf/ixrt/scripts/infer_fsaf_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ed3132c67b47f419fb43822f92530b65d58a742d
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/scripts/infer_fsaf_fp16_accuracy.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="fsaf_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model ${model_path} \
+ --precision float16 \
+ --engine fsaf.engine
+
+
+# inference
+python3 inference.py \
+ --engine fsaf.engine \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/ixrt/scripts/infer_fsaf_fp16_performance.sh b/models/cv/detection/fsaf/ixrt/scripts/infer_fsaf_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..65fad0c7ff07cc92f1be605db91f9e0cbf849a8c
--- /dev/null
+++ b/models/cv/detection/fsaf/ixrt/scripts/infer_fsaf_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="fsaf_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model ${model_path} \
+ --precision float16 \
+ --engine fsaf.engine
+
+
+# inference
+python3 inference.py \
+ --engine fsaf.engine \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True \
+ --fps_target 130
\ No newline at end of file
diff --git a/models/cv/detection/hrnet/ixrt/README.md b/models/cv/detection/hrnet/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cade087c4e00dac7b92db9943932facf97ba04e4
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/README.md
@@ -0,0 +1,60 @@
+# HRNet
+
+## Description
+
+HRNet is an advanced deep learning architecture for human pose estimation, characterized by its maintenance of high-resolution representations throughout the entire network process, thereby avoiding the low-to-high resolution recovery step typical of traditional models. The network features parallel multi-resolution subnetworks and enriches feature representation through repeated multi-scale fusion, which enhances the accuracy of keypoint detection. Additionally, HRNet offers computational efficiency and has demonstrated superior performance over previous methods on several standard datasets.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install -r requirements.txt
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# export onnx model
+python3 export.py --weight fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth --cfg fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py --output hrnet.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim hrnet.onnx hrnet_opt.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_hrnet_fp16_accuracy.sh
+# Performance
+bash scripts/infer_hrnet_fp16_performance.sh
+```
+
+## Results
+
+Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 |
+-------|-----------|----------|----------|----------|---------------|
+HRNet | 32 | FP16 | 75.199 | 0.491 | 0.327 |
+
+## Reference
+
+mmdetection:
diff --git a/models/cv/detection/hrnet/ixrt/build_engine.py b/models/cv/detection/hrnet/ixrt/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..61834f4fe79fe2b9bb4cfc01561b01f177098f69
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/build_engine.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import argparse
+import numpy as np
+
+import torch
+import tensorrt
+from tensorrt import Dims
+
+def main(config):
+ IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
+ builder = tensorrt.Builder(IXRT_LOGGER)
+ EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+ network = builder.create_network(EXPLICIT_BATCH)
+ build_config = builder.create_builder_config()
+ profile = builder.create_optimization_profile()
+ profile.set_shape("input", Dims([32, 3, 800, 800]), Dims([32, 3, 800, 800]), Dims([32, 3, 800, 800]))
+ build_config.add_optimization_profile(profile)
+ parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+ parser.parse_from_file(config.model)
+
+ precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16
+ build_config.set_flag(precision)
+ num_inputs = network.num_inputs
+
+ for i in range(num_inputs):
+ input_tensor = network.get_input(i)
+ input_tensor.shape = Dims([32, 3, 800, 800])
+
+ plan = builder.build_serialized_network(network, build_config)
+ engine_file_path = config.engine
+ with open(engine_file_path, "wb") as f:
+ f.write(plan)
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="float16",
+ help="The precision of datatype")
+ parser.add_argument("--engine", type=str, default=None)
+ args = parser.parse_args()
+ return args
+
+if __name__ == "__main__":
+ args = parse_args()
+ main(args)
\ No newline at end of file
diff --git a/models/cv/detection/hrnet/ixrt/common.py b/models/cv/detection/hrnet/ixrt/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef92a6ba6291058d20f575edb09da35ebff3a937
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/common.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import cv2
+import glob
+import torch
+import tensorrt
+import numpy as np
+from cuda import cuda, cudart
+
+def create_engine_context(engine_path, logger):
+ with open(engine_path, "rb") as f:
+ runtime = tensorrt.Runtime(logger)
+ assert runtime
+ engine = runtime.deserialize_cuda_engine(f.read())
+ assert engine
+ context = engine.create_execution_context()
+ assert context
+
+ return engine, context
+
+def get_io_bindings(engine):
+ # Setup I/O bindings
+ inputs = []
+ outputs = []
+ allocations = []
+
+ for i in range(engine.num_bindings):
+ is_input = False
+ if engine.binding_is_input(i):
+ is_input = True
+ name = engine.get_binding_name(i)
+ dtype = engine.get_binding_dtype(i)
+ shape = engine.get_binding_shape(i)
+ if is_input:
+ batch_size = shape[0]
+ size = np.dtype(tensorrt.nptype(dtype)).itemsize
+ for s in shape:
+ size *= s
+ err, allocation = cudart.cudaMalloc(size)
+ assert err == cudart.cudaError_t.cudaSuccess
+ binding = {
+ "index": i,
+ "name": name,
+ "dtype": np.dtype(tensorrt.nptype(dtype)),
+ "shape": list(shape),
+ "allocation": allocation,
+ "nbytes": size,
+ }
+ print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}")
+ allocations.append(allocation)
+ if engine.binding_is_input(i):
+ inputs.append(binding)
+ else:
+ outputs.append(binding)
+ return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/cv/detection/hrnet/ixrt/deploy_default.py b/models/cv/detection/hrnet/ixrt/deploy_default.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8d8e43dc829456f0c2e46a7acfc3128757f945d
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/deploy_default.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+onnx_config = dict(
+ type='onnx',
+ export_params=True,
+ keep_initializers_as_inputs=False,
+ opset_version=11,
+ save_file='end2end.onnx',
+ input_names=['input'],
+ output_names=['output'],
+ input_shape=None,
+ optimize=True)
+
+codebase_config = dict(
+ type='mmdet',
+ task='ObjectDetection',
+ model_type='end2end',
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ ))
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/detection/hrnet/ixrt/export.py b/models/cv/detection/hrnet/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..bceaba7801843feb5ef095f62a71a2f6e0074db4
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--weight",
+ type=str,
+ required=True,
+ help="pytorch model weight.")
+
+ parser.add_argument("--cfg",
+ type=str,
+ required=True,
+ help="model config file.")
+
+ parser.add_argument("--output",
+ type=str,
+ required=True,
+ help="export onnx model path.")
+
+ args = parser.parse_args()
+ return args
+
+def main():
+ args = parse_args()
+
+ deploy_cfg = 'deploy_default.py'
+ model_cfg = args.cfg
+ model_checkpoint = args.weight
+
+ deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+ task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+ model = task_processor.build_pytorch_model(model_checkpoint)
+
+ input_names = ['input']
+ output_names = ['output']
+ dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+ dummy_input = torch.randn(1, 3, 800, 800)
+
+ torch.onnx.export(
+ model,
+ dummy_input,
+ args.output,
+ input_names = input_names,
+ dynamic_axes = dynamic_axes,
+ output_names = output_names,
+ opset_version=13
+ )
+
+ print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+ main()
+
diff --git a/models/cv/detection/hrnet/ixrt/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py b/models/cv/detection/hrnet/ixrt/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..faccb7732d7063f680cf023fce4fc3ece716583d
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py
@@ -0,0 +1,287 @@
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+backend_args = None
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+ checkpoint=dict(interval=1, type='CheckpointHook'),
+ logger=dict(interval=50, type='LoggerHook'),
+ param_scheduler=dict(type='ParamSchedulerHook'),
+ sampler_seed=dict(type='DistSamplerSeedHook'),
+ timer=dict(type='IterTimerHook'),
+ visualization=dict(type='DetVisualizationHook'))
+default_scope = 'mmdet'
+env_cfg = dict(
+ cudnn_benchmark=False,
+ dist_cfg=dict(backend='nccl'),
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+load_from = None
+log_level = 'ERROR'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+model = dict(
+ backbone=dict(
+ extra=dict(
+ stage1=dict(
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_branches=1,
+ num_channels=(64, ),
+ num_modules=1),
+ stage2=dict(
+ block='BASIC',
+ num_blocks=(
+ 4,
+ 4,
+ ),
+ num_branches=2,
+ num_channels=(
+ 18,
+ 36,
+ ),
+ num_modules=1),
+ stage3=dict(
+ block='BASIC',
+ num_blocks=(
+ 4,
+ 4,
+ 4,
+ ),
+ num_branches=3,
+ num_channels=(
+ 18,
+ 36,
+ 72,
+ ),
+ num_modules=4),
+ stage4=dict(
+ block='BASIC',
+ num_blocks=(
+ 4,
+ 4,
+ 4,
+ 4,
+ ),
+ num_branches=4,
+ num_channels=(
+ 18,
+ 36,
+ 72,
+ 144,
+ ),
+ num_modules=3)),
+ init_cfg=dict(
+ checkpoint='open-mmlab://msra/hrnetv2_w18', type='Pretrained'),
+ type='HRNet'),
+ bbox_head=dict(
+ feat_channels=256,
+ in_channels=256,
+ loss_bbox=dict(loss_weight=1.0, type='IoULoss'),
+ loss_centerness=dict(
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
+ loss_cls=dict(
+ alpha=0.25,
+ gamma=2.0,
+ loss_weight=1.0,
+ type='FocalLoss',
+ use_sigmoid=True),
+ num_classes=80,
+ stacked_convs=4,
+ strides=[
+ 8,
+ 16,
+ 32,
+ 64,
+ 128,
+ ],
+ type='FCOSHead'),
+ data_preprocessor=dict(
+ bgr_to_rgb=False,
+ mean=[
+ 103.53,
+ 116.28,
+ 123.675,
+ ],
+ pad_size_divisor=32,
+ std=[
+ 57.375,
+ 57.12,
+ 58.395,
+ ],
+ type='DetDataPreprocessor'),
+ neck=dict(
+ in_channels=[
+ 18,
+ 36,
+ 72,
+ 144,
+ ],
+ num_outs=5,
+ out_channels=256,
+ stride=2,
+ type='HRFPN'),
+ test_cfg=dict(
+ max_per_img=100,
+ min_bbox_size=0,
+ nms=dict(iou_threshold=0.5, type='nms'),
+ nms_pre=1000,
+ score_thr=0.05),
+ type='FCOS')
+optim_wrapper = dict(
+ clip_grad=dict(max_norm=35, norm_type=2),
+ optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001),
+ paramwise_cfg=dict(bias_decay_mult=0.0, bias_lr_mult=2.0),
+ type='OptimWrapper')
+param_scheduler = [
+ dict(
+ begin=0,
+ by_epoch=False,
+ end=500,
+ factor=0.3333333333333333,
+ type='ConstantLR'),
+ dict(
+ begin=0,
+ by_epoch=True,
+ end=12,
+ gamma=0.1,
+ milestones=[
+ 8,
+ 11,
+ ],
+ type='MultiStepLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+ batch_size=32,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='val2017/'),
+ data_root='/home/xinchi.tian/ixrt-modelzoo/data/datasets/coco2017',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+ ann_file=
+ '/home/xinchi.tian/ixrt-modelzoo/data/datasets/coco2017/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+test_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+]
+train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
+ batch_size=4,
+ dataset=dict(
+ ann_file='annotations/instances_train2017.json',
+ backend_args=None,
+ data_prefix=dict(img='train2017/'),
+ data_root='data/coco/',
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+ ],
+ type='CocoDataset'),
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(keep_ratio=True, scale=(
+ 1333,
+ 800,
+ ), type='Resize'),
+ dict(prob=0.5, type='RandomFlip'),
+ dict(type='PackDetInputs'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+ batch_size=1,
+ dataset=dict(
+ ann_file='annotations/instances_val2017.json',
+ backend_args=None,
+ data_prefix=dict(img='val2017/'),
+ data_root='data/coco/',
+ pipeline=[
+ dict(backend_args=None, type='LoadImageFromFile'),
+ dict(keep_ratio=True, scale=(
+ 800,
+ 800,
+ ), type='Resize'),
+ dict(type='LoadAnnotations', with_bbox=True),
+ dict(
+ meta_keys=(
+ 'img_id',
+ 'img_path',
+ 'ori_shape',
+ 'img_shape',
+ 'scale_factor',
+ ),
+ type='PackDetInputs'),
+ ],
+ test_mode=True,
+ type='CocoDataset'),
+ drop_last=False,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+ ann_file='data/coco/annotations/instances_val2017.json',
+ backend_args=None,
+ format_only=False,
+ metric='bbox',
+ type='CocoMetric')
+vis_backends = [
+ dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+ name='visualizer',
+ type='DetLocalVisualizer',
+ vis_backends=[
+ dict(type='LocalVisBackend'),
+ ])
+work_dir = './'
diff --git a/models/cv/detection/hrnet/ixrt/inference.py b/models/cv/detection/hrnet/ixrt/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5e84d967d991eb4de2fb9eb111737d6b74f2976
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/inference.py
@@ -0,0 +1,190 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import os
+import time
+import argparse
+import tensorrt
+import torch
+import torchvision
+import numpy as np
+from tensorrt import Dims
+from cuda import cuda, cudart
+from tqdm import tqdm
+from mmdet.registry import RUNNERS
+from mmengine.config import Config
+
+from common import create_engine_context, get_io_bindings
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ host_mem = tensorrt.IHostMemory
+ logger = tensorrt.Logger(tensorrt.Logger.ERROR)
+
+ # Load Engine && I/O bindings
+ engine, context = create_engine_context(args.engine, logger)
+ inputs, outputs, allocations = get_io_bindings(engine)
+
+ if args.warmup > 0:
+ print("\nWarm Start.")
+ for i in range(args.warmup):
+ context.execute_v2(allocations)
+ print("Warm Done.")
+
+ # just run perf test
+ if args.perf_only:
+ torch.cuda.synchronize()
+ start_time = time.time()
+
+ for i in range(10):
+ context.execute_v2(allocations)
+
+ torch.cuda.synchronize()
+ end_time = time.time()
+ forward_time = end_time - start_time
+ num_samples = 10 * args.batchsize
+ fps = num_samples / forward_time
+
+ print("FPS : ", fps)
+ print(f"Performance Check : Test {fps} >= target {args.fps_target}")
+ if fps >= args.fps_target:
+ print("pass!")
+ exit()
+ else:
+ print("failed!")
+ exit(1)
+ else:
+ # Runner config
+ cfg = Config.fromfile("fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py")
+ cfg.work_dir = "./"
+
+ cfg['test_dataloader']['batch_size'] = batch_size
+ cfg['test_dataloader']['dataset']['data_root'] = args.datasets
+ cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
+ cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+ cfg['log_level'] = 'ERROR'
+
+ runner = RUNNERS.build(cfg)
+
+ for input_data in tqdm(runner.test_dataloader):
+
+ input_data = runner.model.data_preprocessor(input_data, False)
+ image = input_data['inputs'].cpu()
+ image = image.numpy().astype(inputs[0]["dtype"])
+ pad_batch = len(image) != batch_size
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+ image = np.ascontiguousarray(image)
+
+ (err,) = cudart.cudaMemcpy(
+ inputs[0]["allocation"],
+ image,
+ image.nbytes,
+ cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+
+ context.execute_v2(allocations)
+
+ cls_score = []
+ box_reg = []
+ score_factors = []
+ for i in range(len(outputs)):
+ output = np.zeros(outputs[i]["shape"], outputs[i]["dtype"])
+ (err,) = cudart.cudaMemcpy(
+ output,
+ outputs[i]["allocation"],
+ outputs[i]["nbytes"],
+ cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
+ )
+ assert err == cudart.cudaError_t.cudaSuccess
+
+ if pad_batch:
+ output = output[:origin_size]
+
+ output = torch.from_numpy(output)
+
+ if output.shape[1] == 80:
+ cls_score.append(output)
+ elif output.shape[1] == 4:
+ box_reg.append(output)
+ else:
+ score_factors.append(output)
+
+ batch_img_metas = [
+ data_samples.metainfo for data_samples in input_data['data_samples']
+ ]
+
+ results_list = runner.model.bbox_head.predict_by_feat(cls_score, box_reg, score_factors, batch_img_metas=batch_img_metas, rescale=True)
+
+ batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], results_list)
+
+ runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=input_data)
+
+ metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/detection/hrnet/ixrt/requirements.txt b/models/cv/detection/hrnet/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97ac9c0458744fb56d62781ffd96279f893817f3
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/requirements.txt
@@ -0,0 +1,6 @@
+onnx
+tqdm
+onnxsim
+mmdet
+mmdeploy
+mmengine
diff --git a/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_accuracy.sh b/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_accuracy.sh
new file mode 100644
index 0000000000000000000000000000000000000000..74f55c2970c1b3a2a8902b5b3884b2cecb4dfb3b
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_accuracy.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="hrnet_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model ${model_path} \
+ --precision float16 \
+ --engine hrnet.engine
+
+
+# inference
+python3 inference.py \
+ --engine hrnet.engine \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_performance.sh b/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_performance.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ae6a5ef1529ff76de25c62be2b20eb003f79f678
--- /dev/null
+++ b/models/cv/detection/hrnet/ixrt/scripts/infer_hrnet_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+batchsize=32
+model_path="hrnet_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py \
+ --model ${model_path} \
+ --precision float16 \
+ --engine hrnet.engine
+
+
+# inference
+python3 inference.py \
+ --engine hrnet.engine \
+ --batchsize ${batchsize} \
+ --input_name input \
+ --datasets ${datasets_path} \
+ --perf_only True \
+ --fps_target 70
\ No newline at end of file
diff --git a/models/cv/detection/retinaface/ixrt/README.md b/models/cv/detection/retinaface/ixrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8fff5439fd8e82e836e58abc7c38296f0ec27db5
--- /dev/null
+++ b/models/cv/detection/retinaface/ixrt/README.md
@@ -0,0 +1,63 @@
+# RetinaFace
+
+## Description
+
+RetinaFace is an efficient single-stage face detection model that employs a multi-task learning strategy to simultaneously predict facial locations, landmarks, and 3D facial shapes. It utilizes feature pyramids and context modules to extract multi-scale features and employs a self-supervised mesh decoder to enhance detection accuracy. RetinaFace demonstrates excellent performance on datasets like WIDER FACE, supports real-time processing, and its code and datasets are publicly available for researchers.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install -r requirements.txt
+
+python3 setup.py build_ext --inplace
+```
+
+### Download
+
+Pretrained model:
+
+Dataset: to download the validation dataset.
+
+```bash
+wget https://github.com/biubug6/Face-Detector-1MB-with-landmark/raw/master/weights/mobilenet0.25_Final.pth
+```
+
+### Model Conversion
+
+```bash
+# export onnx model
+python3 torch2onnx.py --model mobilenet0.25_Final.pth --onnx_model mnetv1_retinaface.onnx
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/widerface/
+export GT_DIR=../igie/ground_truth
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_retinaface_fp16_accuracy.sh
+# Performance
+bash scripts/infer_retinaface_fp16_performance.sh
+```
+
+## Results
+
+| Model | BatchSize | Precision | FPS | Easy AP(%) | Medium AP (%) | Hard AP(%) |
+| :--------: | :-------: | :-------: | :------: | :--------: | :-----------: | :--------: |
+| RetinaFace | 32 | FP16 | 8536.367 | 80.84 | 69.34 | 37.31 |
+
+## Reference
+
+Face-Detector-1MB-with-landmark:
diff --git a/models/cv/detection/retinaface/ixrt/__init__.py b/models/cv/detection/retinaface/ixrt/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/models/cv/detection/retinaface/ixrt/box_overlaps.c b/models/cv/detection/retinaface/ixrt/box_overlaps.c
new file mode 100644
index 0000000000000000000000000000000000000000..364ceaae27f00b8616a3798d584cd799678913e5
--- /dev/null
+++ b/models/cv/detection/retinaface/ixrt/box_overlaps.c
@@ -0,0 +1,7097 @@
+/* Generated by Cython 0.29.36 */
+
+/* BEGIN: Cython Metadata
+{
+ "distutils": {
+ "depends": [
+ "/usr/local/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h",
+ "/usr/local/lib/python3.10/site-packages/numpy/core/include/numpy/arrayscalars.h",
+ "/usr/local/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h",
+ "/usr/local/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h",
+ "/usr/local/lib/python3.10/site-packages/numpy/core/include/numpy/ufuncobject.h"
+ ],
+ "include_dirs": [
+ "/usr/local/lib/python3.10/site-packages/numpy/core/include"
+ ],
+ "name": "bbox",
+ "sources": [
+ "box_overlaps.pyx"
+ ]
+ },
+ "module_name": "bbox"
+}
+END: Cython Metadata */
+
+#ifndef PY_SSIZE_T_CLEAN
+#define PY_SSIZE_T_CLEAN
+#endif /* PY_SSIZE_T_CLEAN */
+#include "Python.h"
+#ifndef Py_PYTHON_H
+ #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
+ #error Cython requires Python 2.6+ or Python 3.3+.
+#else
+#define CYTHON_ABI "0_29_36"
+#define CYTHON_HEX_VERSION 0x001D24F0
+#define CYTHON_FUTURE_DIVISION 0
+#include
+#ifndef offsetof
+ #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+ #ifndef __stdcall
+ #define __stdcall
+ #endif
+ #ifndef __cdecl
+ #define __cdecl
+ #endif
+ #ifndef __fastcall
+ #define __fastcall
+ #endif
+#endif
+#ifndef DL_IMPORT
+ #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+ #define DL_EXPORT(t) t
+#endif
+#define __PYX_COMMA ,
+#ifndef HAVE_LONG_LONG
+ #if PY_VERSION_HEX >= 0x02070000
+ #define HAVE_LONG_LONG
+ #endif
+#endif
+#ifndef PY_LONG_LONG
+ #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef Py_HUGE_VAL
+ #define Py_HUGE_VAL HUGE_VAL
+#endif
+#ifdef PYPY_VERSION
+ #define CYTHON_COMPILING_IN_PYPY 1
+ #define CYTHON_COMPILING_IN_PYSTON 0
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #undef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 0
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #if PY_VERSION_HEX < 0x03050000
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #undef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 0
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #undef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 1
+ #undef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 0
+ #undef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 0
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #if PY_VERSION_HEX < 0x03090000
+ #undef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+ #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT)
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+ #endif
+ #undef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00)
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+ #endif
+#elif defined(PYSTON_VERSION)
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_PYSTON 1
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #ifndef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #ifndef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 1
+ #endif
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #ifndef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 0
+ #endif
+ #ifndef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 1
+ #endif
+ #ifndef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 1
+ #endif
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #undef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+ #undef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE 0
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+ #endif
+#elif defined(PY_NOGIL)
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_PYSTON 0
+ #define CYTHON_COMPILING_IN_CPYTHON 0
+ #define CYTHON_COMPILING_IN_NOGIL 1
+ #ifndef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #ifndef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #undef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 0
+ #ifndef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 1
+ #endif
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #ifndef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 0
+ #endif
+ #ifndef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 1
+ #endif
+ #ifndef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 1
+ #endif
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #undef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL 0
+ #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+ #endif
+ #ifndef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE 1
+ #endif
+ #undef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS 0
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+#else
+ #define CYTHON_COMPILING_IN_PYPY 0
+ #define CYTHON_COMPILING_IN_PYSTON 0
+ #define CYTHON_COMPILING_IN_CPYTHON 1
+ #define CYTHON_COMPILING_IN_NOGIL 0
+ #ifndef CYTHON_USE_TYPE_SLOTS
+ #define CYTHON_USE_TYPE_SLOTS 1
+ #endif
+ #if PY_VERSION_HEX < 0x02070000
+ #undef CYTHON_USE_PYTYPE_LOOKUP
+ #define CYTHON_USE_PYTYPE_LOOKUP 0
+ #elif !defined(CYTHON_USE_PYTYPE_LOOKUP)
+ #define CYTHON_USE_PYTYPE_LOOKUP 1
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ #undef CYTHON_USE_ASYNC_SLOTS
+ #define CYTHON_USE_ASYNC_SLOTS 0
+ #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+ #define CYTHON_USE_ASYNC_SLOTS 1
+ #endif
+ #if PY_VERSION_HEX < 0x02070000
+ #undef CYTHON_USE_PYLONG_INTERNALS
+ #define CYTHON_USE_PYLONG_INTERNALS 0
+ #elif !defined(CYTHON_USE_PYLONG_INTERNALS)
+ #define CYTHON_USE_PYLONG_INTERNALS (PY_VERSION_HEX < 0x030C00A5)
+ #endif
+ #ifndef CYTHON_USE_PYLIST_INTERNALS
+ #define CYTHON_USE_PYLIST_INTERNALS 1
+ #endif
+ #ifndef CYTHON_USE_UNICODE_INTERNALS
+ #define CYTHON_USE_UNICODE_INTERNALS 1
+ #endif
+ #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2
+ #undef CYTHON_USE_UNICODE_WRITER
+ #define CYTHON_USE_UNICODE_WRITER 0
+ #elif !defined(CYTHON_USE_UNICODE_WRITER)
+ #define CYTHON_USE_UNICODE_WRITER 1
+ #endif
+ #ifndef CYTHON_AVOID_BORROWED_REFS
+ #define CYTHON_AVOID_BORROWED_REFS 0
+ #endif
+ #ifndef CYTHON_ASSUME_SAFE_MACROS
+ #define CYTHON_ASSUME_SAFE_MACROS 1
+ #endif
+ #ifndef CYTHON_UNPACK_METHODS
+ #define CYTHON_UNPACK_METHODS 1
+ #endif
+ #if PY_VERSION_HEX >= 0x030B00A4
+ #undef CYTHON_FAST_THREAD_STATE
+ #define CYTHON_FAST_THREAD_STATE 0
+ #elif !defined(CYTHON_FAST_THREAD_STATE)
+ #define CYTHON_FAST_THREAD_STATE 1
+ #endif
+ #ifndef CYTHON_FAST_PYCALL
+ #define CYTHON_FAST_PYCALL (PY_VERSION_HEX < 0x030A0000)
+ #endif
+ #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
+ #define CYTHON_PEP489_MULTI_PHASE_INIT (PY_VERSION_HEX >= 0x03050000)
+ #endif
+ #ifndef CYTHON_USE_TP_FINALIZE
+ #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1)
+ #endif
+ #ifndef CYTHON_USE_DICT_VERSIONS
+ #define CYTHON_USE_DICT_VERSIONS ((PY_VERSION_HEX >= 0x030600B1) && (PY_VERSION_HEX < 0x030C00A5))
+ #endif
+ #if PY_VERSION_HEX >= 0x030B00A4
+ #undef CYTHON_USE_EXC_INFO_STACK
+ #define CYTHON_USE_EXC_INFO_STACK 0
+ #elif !defined(CYTHON_USE_EXC_INFO_STACK)
+ #define CYTHON_USE_EXC_INFO_STACK (PY_VERSION_HEX >= 0x030700A3)
+ #endif
+ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+ #define CYTHON_UPDATE_DESCRIPTOR_DOC 1
+ #endif
+#endif
+#if !defined(CYTHON_FAST_PYCCALL)
+#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1)
+#endif
+#if CYTHON_USE_PYLONG_INTERNALS
+ #if PY_MAJOR_VERSION < 3
+ #include "longintrepr.h"
+ #endif
+ #undef SHIFT
+ #undef BASE
+ #undef MASK
+ #ifdef SIZEOF_VOID_P
+ enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
+ #endif
+#endif
+#ifndef __has_attribute
+ #define __has_attribute(x) 0
+#endif
+#ifndef __has_cpp_attribute
+ #define __has_cpp_attribute(x) 0
+#endif
+#ifndef CYTHON_RESTRICT
+ #if defined(__GNUC__)
+ #define CYTHON_RESTRICT __restrict__
+ #elif defined(_MSC_VER) && _MSC_VER >= 1400
+ #define CYTHON_RESTRICT __restrict
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define CYTHON_RESTRICT restrict
+ #else
+ #define CYTHON_RESTRICT
+ #endif
+#endif
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+#endif
+#ifndef CYTHON_MAYBE_UNUSED_VAR
+# if defined(__cplusplus)
+ template void CYTHON_MAYBE_UNUSED_VAR( const T& ) { }
+# else
+# define CYTHON_MAYBE_UNUSED_VAR(x) (void)(x)
+# endif
+#endif
+#ifndef CYTHON_NCP_UNUSED
+# if CYTHON_COMPILING_IN_CPYTHON
+# define CYTHON_NCP_UNUSED
+# else
+# define CYTHON_NCP_UNUSED CYTHON_UNUSED
+# endif
+#endif
+#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
+#ifdef _MSC_VER
+ #ifndef _MSC_STDINT_H_
+ #if _MSC_VER < 1300
+ typedef unsigned char uint8_t;
+ typedef unsigned int uint32_t;
+ #else
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int32 uint32_t;
+ #endif
+ #endif
+#else
+ #include
+#endif
+#ifndef CYTHON_FALLTHROUGH
+ #if defined(__cplusplus) && __cplusplus >= 201103L
+ #if __has_cpp_attribute(fallthrough)
+ #define CYTHON_FALLTHROUGH [[fallthrough]]
+ #elif __has_cpp_attribute(clang::fallthrough)
+ #define CYTHON_FALLTHROUGH [[clang::fallthrough]]
+ #elif __has_cpp_attribute(gnu::fallthrough)
+ #define CYTHON_FALLTHROUGH [[gnu::fallthrough]]
+ #endif
+ #endif
+ #ifndef CYTHON_FALLTHROUGH
+ #if __has_attribute(fallthrough)
+ #define CYTHON_FALLTHROUGH __attribute__((fallthrough))
+ #else
+ #define CYTHON_FALLTHROUGH
+ #endif
+ #endif
+ #if defined(__clang__ ) && defined(__apple_build_version__)
+ #if __apple_build_version__ < 7000000
+ #undef CYTHON_FALLTHROUGH
+ #define CYTHON_FALLTHROUGH
+ #endif
+ #endif
+#endif
+
+#ifndef CYTHON_INLINE
+ #if defined(__clang__)
+ #define CYTHON_INLINE __inline__ __attribute__ ((__unused__))
+ #elif defined(__GNUC__)
+ #define CYTHON_INLINE __inline__
+ #elif defined(_MSC_VER)
+ #define CYTHON_INLINE __inline
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define CYTHON_INLINE inline
+ #else
+ #define CYTHON_INLINE
+ #endif
+#endif
+
+#define __PYX_BUILD_PY_SSIZE_T "n"
+#define CYTHON_FORMAT_SSIZE_T "z"
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+ #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+ PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+ #define __Pyx_DefaultClassType PyClass_Type
+#else
+ #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+ #define __Pyx_DefaultClassType PyType_Type
+#if PY_VERSION_HEX >= 0x030B00A1
+ static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int k, int l, int s, int f,
+ PyObject *code, PyObject *c, PyObject* n, PyObject *v,
+ PyObject *fv, PyObject *cell, PyObject* fn,
+ PyObject *name, int fline, PyObject *lnos) {
+ PyObject *kwds=NULL, *argcount=NULL, *posonlyargcount=NULL, *kwonlyargcount=NULL;
+ PyObject *nlocals=NULL, *stacksize=NULL, *flags=NULL, *replace=NULL, *call_result=NULL, *empty=NULL;
+ const char *fn_cstr=NULL;
+ const char *name_cstr=NULL;
+ PyCodeObject* co=NULL;
+ PyObject *type, *value, *traceback;
+ PyErr_Fetch(&type, &value, &traceback);
+ if (!(kwds=PyDict_New())) goto end;
+ if (!(argcount=PyLong_FromLong(a))) goto end;
+ if (PyDict_SetItemString(kwds, "co_argcount", argcount) != 0) goto end;
+ if (!(posonlyargcount=PyLong_FromLong(0))) goto end;
+ if (PyDict_SetItemString(kwds, "co_posonlyargcount", posonlyargcount) != 0) goto end;
+ if (!(kwonlyargcount=PyLong_FromLong(k))) goto end;
+ if (PyDict_SetItemString(kwds, "co_kwonlyargcount", kwonlyargcount) != 0) goto end;
+ if (!(nlocals=PyLong_FromLong(l))) goto end;
+ if (PyDict_SetItemString(kwds, "co_nlocals", nlocals) != 0) goto end;
+ if (!(stacksize=PyLong_FromLong(s))) goto end;
+ if (PyDict_SetItemString(kwds, "co_stacksize", stacksize) != 0) goto end;
+ if (!(flags=PyLong_FromLong(f))) goto end;
+ if (PyDict_SetItemString(kwds, "co_flags", flags) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_code", code) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_consts", c) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_names", n) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_varnames", v) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_freevars", fv) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_cellvars", cell) != 0) goto end;
+ if (PyDict_SetItemString(kwds, "co_linetable", lnos) != 0) goto end;
+ if (!(fn_cstr=PyUnicode_AsUTF8AndSize(fn, NULL))) goto end;
+ if (!(name_cstr=PyUnicode_AsUTF8AndSize(name, NULL))) goto end;
+ if (!(co = PyCode_NewEmpty(fn_cstr, name_cstr, fline))) goto end;
+ if (!(replace = PyObject_GetAttrString((PyObject*)co, "replace"))) goto cleanup_code_too;
+ if (!(empty = PyTuple_New(0))) goto cleanup_code_too; // unfortunately __pyx_empty_tuple isn't available here
+ if (!(call_result = PyObject_Call(replace, empty, kwds))) goto cleanup_code_too;
+ Py_XDECREF((PyObject*)co);
+ co = (PyCodeObject*)call_result;
+ call_result = NULL;
+ if (0) {
+ cleanup_code_too:
+ Py_XDECREF((PyObject*)co);
+ co = NULL;
+ }
+ end:
+ Py_XDECREF(kwds);
+ Py_XDECREF(argcount);
+ Py_XDECREF(posonlyargcount);
+ Py_XDECREF(kwonlyargcount);
+ Py_XDECREF(nlocals);
+ Py_XDECREF(stacksize);
+ Py_XDECREF(replace);
+ Py_XDECREF(call_result);
+ Py_XDECREF(empty);
+ if (type) {
+ PyErr_Restore(type, value, traceback);
+ }
+ return co;
+ }
+#else
+ #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+ PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#endif
+ #define __Pyx_DefaultClassType PyType_Type
+#endif
+#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY
+ #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o)
+#else
+ #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o)
+#endif
+#ifndef Py_TPFLAGS_CHECKTYPES
+ #define Py_TPFLAGS_CHECKTYPES 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_INDEX
+ #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
+ #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_FINALIZE
+ #define Py_TPFLAGS_HAVE_FINALIZE 0
+#endif
+#ifndef METH_STACKLESS
+ #define METH_STACKLESS 0
+#endif
+#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL)
+ #ifndef METH_FASTCALL
+ #define METH_FASTCALL 0x80
+ #endif
+ typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs);
+ typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args,
+ Py_ssize_t nargs, PyObject *kwnames);
+#else
+ #define __Pyx_PyCFunctionFast _PyCFunctionFast
+ #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords
+#endif
+#if CYTHON_FAST_PYCCALL
+#define __Pyx_PyFastCFunction_Check(func)\
+ ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS)))))
+#else
+#define __Pyx_PyFastCFunction_Check(func) 0
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
+ #define PyObject_Malloc(s) PyMem_Malloc(s)
+ #define PyObject_Free(p) PyMem_Free(p)
+ #define PyObject_Realloc(p) PyMem_Realloc(p)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030400A1
+ #define PyMem_RawMalloc(n) PyMem_Malloc(n)
+ #define PyMem_RawRealloc(p, n) PyMem_Realloc(p, n)
+ #define PyMem_RawFree(p) PyMem_Free(p)
+#endif
+#if CYTHON_COMPILING_IN_PYSTON
+ #define __Pyx_PyCode_HasFreeVars(co) PyCode_HasFreeVars(co)
+ #define __Pyx_PyFrame_SetLineNumber(frame, lineno) PyFrame_SetLineNumber(frame, lineno)
+#else
+ #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0)
+ #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno)
+#endif
+#if !CYTHON_FAST_THREAD_STATE || PY_VERSION_HEX < 0x02070000
+ #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#elif PY_VERSION_HEX >= 0x03060000
+ #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet()
+#elif PY_VERSION_HEX >= 0x03000000
+ #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#else
+ #define __Pyx_PyThreadState_Current _PyThreadState_Current
+#endif
+#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT)
+#include "pythread.h"
+#define Py_tss_NEEDS_INIT 0
+typedef int Py_tss_t;
+static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) {
+ *key = PyThread_create_key();
+ return 0;
+}
+static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) {
+ Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t));
+ *key = Py_tss_NEEDS_INIT;
+ return key;
+}
+static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) {
+ PyObject_Free(key);
+}
+static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) {
+ return *key != Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) {
+ PyThread_delete_key(*key);
+ *key = Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) {
+ return PyThread_set_key_value(*key, value);
+}
+static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
+ return PyThread_get_key_value(*key);
+}
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized)
+#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n))
+#else
+#define __Pyx_PyDict_NewPresized(n) PyDict_New()
+#endif
+#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
+#else
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && CYTHON_USE_UNICODE_INTERNALS
+#define __Pyx_PyDict_GetItemStr(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash)
+#else
+#define __Pyx_PyDict_GetItemStr(dict, name) PyDict_GetItem(dict, name)
+#endif
+#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
+ #define CYTHON_PEP393_ENABLED 1
+ #if PY_VERSION_HEX >= 0x030C0000
+ #define __Pyx_PyUnicode_READY(op) (0)
+ #else
+ #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\
+ 0 : _PyUnicode_Ready((PyObject *)(op)))
+ #endif
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+ #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u)
+ #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
+ #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
+ #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
+ #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch)
+ #if PY_VERSION_HEX >= 0x030C0000
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u))
+ #else
+ #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
+ #else
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
+ #endif
+ #endif
+#else
+ #define CYTHON_PEP393_ENABLED 0
+ #define PyUnicode_1BYTE_KIND 1
+ #define PyUnicode_2BYTE_KIND 2
+ #define PyUnicode_4BYTE_KIND 4
+ #define __Pyx_PyUnicode_READY(op) (0)
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+ #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111)
+ #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE))
+ #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
+ #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
+ #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = ch)
+ #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+ #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
+ #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
+#else
+ #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
+ #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
+ PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
+ #define PyUnicode_Contains(u, s) PySequence_Contains(u, s)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyByteArray_Check)
+ #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format)
+ #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt)
+#endif
+#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
+#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
+#else
+ #define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
+#endif
+#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
+ #define PyObject_ASCII(o) PyObject_Repr(o)
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyBaseString_Type PyUnicode_Type
+ #define PyStringObject PyUnicodeObject
+ #define PyString_Type PyUnicode_Type
+ #define PyString_Check PyUnicode_Check
+ #define PyString_CheckExact PyUnicode_CheckExact
+#ifndef PyObject_Unicode
+ #define PyObject_Unicode PyObject_Str
+#endif
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+ #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+ #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
+ #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
+#endif
+#ifndef PySet_CheckExact
+ #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
+#endif
+#if PY_VERSION_HEX >= 0x030900A4
+ #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
+ #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
+#else
+ #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
+ #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
+#endif
+#if CYTHON_ASSUME_SAFE_MACROS
+ #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq)
+#else
+ #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq)
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyIntObject PyLongObject
+ #define PyInt_Type PyLong_Type
+ #define PyInt_Check(op) PyLong_Check(op)
+ #define PyInt_CheckExact(op) PyLong_CheckExact(op)
+ #define PyInt_FromString PyLong_FromString
+ #define PyInt_FromUnicode PyLong_FromUnicode
+ #define PyInt_FromLong PyLong_FromLong
+ #define PyInt_FromSize_t PyLong_FromSize_t
+ #define PyInt_FromSsize_t PyLong_FromSsize_t
+ #define PyInt_AsLong PyLong_AsLong
+ #define PyInt_AS_LONG PyLong_AS_LONG
+ #define PyInt_AsSsize_t PyLong_AsSsize_t
+ #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
+ #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+ #define PyNumber_Int PyNumber_Long
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define PyBoolObject PyLongObject
+#endif
+#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
+ #ifndef PyUnicode_InternFromString
+ #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
+ #endif
+#endif
+#if PY_VERSION_HEX < 0x030200A4
+ typedef long Py_hash_t;
+ #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+ #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t
+#else
+ #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+ #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t
+#endif
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyMethod_New(func, self, klass) ((self) ? ((void)(klass), PyMethod_New(func, self)) : __Pyx_NewRef(func))
+#else
+ #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
+#endif
+#if CYTHON_USE_ASYNC_SLOTS
+ #if PY_VERSION_HEX >= 0x030500B1
+ #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
+ #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
+ #else
+ #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
+ #endif
+#else
+ #define __Pyx_PyType_AsAsync(obj) NULL
+#endif
+#ifndef __Pyx_PyAsyncMethodsStruct
+ typedef struct {
+ unaryfunc am_await;
+ unaryfunc am_aiter;
+ unaryfunc am_anext;
+ } __Pyx_PyAsyncMethodsStruct;
+#endif
+
+#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS)
+ #if !defined(_USE_MATH_DEFINES)
+ #define _USE_MATH_DEFINES
+ #endif
+#endif
+#include
+#ifdef NAN
+#define __PYX_NAN() ((float) NAN)
+#else
+static CYTHON_INLINE float __PYX_NAN() {
+ float value;
+ memset(&value, 0xFF, sizeof(value));
+ return value;
+}
+#endif
+#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL)
+#define __Pyx_truncl trunc
+#else
+#define __Pyx_truncl truncl
+#endif
+
+#define __PYX_MARK_ERR_POS(f_index, lineno) \
+ { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
+#define __PYX_ERR(f_index, lineno, Ln_error) \
+ { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
+
+#ifndef __PYX_EXTERN_C
+ #ifdef __cplusplus
+ #define __PYX_EXTERN_C extern "C"
+ #else
+ #define __PYX_EXTERN_C extern
+ #endif
+#endif
+
+#define __PYX_HAVE__bbox
+#define __PYX_HAVE_API__bbox
+/* Early includes */
+#include
+#include
+#include "numpy/arrayobject.h"
+#include "numpy/ndarrayobject.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/arrayscalars.h"
+#include "numpy/ufuncobject.h"
+
+ /* NumPy API declarations from "numpy/__init__.pxd" */
+
+#ifdef _OPENMP
+#include
+#endif /* _OPENMP */
+
+#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS)
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
+ const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
+
+#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)
+#define __PYX_DEFAULT_STRING_ENCODING ""
+#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
+#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#define __Pyx_uchar_cast(c) ((unsigned char)c)
+#define __Pyx_long_cast(x) ((long)x)
+#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\
+ (sizeof(type) < sizeof(Py_ssize_t)) ||\
+ (sizeof(type) > sizeof(Py_ssize_t) &&\
+ likely(v < (type)PY_SSIZE_T_MAX ||\
+ v == (type)PY_SSIZE_T_MAX) &&\
+ (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
+ v == (type)PY_SSIZE_T_MIN))) ||\
+ (sizeof(type) == sizeof(Py_ssize_t) &&\
+ (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
+ v == (type)PY_SSIZE_T_MAX))) )
+static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
+ return (size_t) i < (size_t) limit;
+}
+#if defined (__cplusplus) && __cplusplus >= 201103L
+ #include
+ #define __Pyx_sst_abs(value) std::abs(value)
+#elif SIZEOF_INT >= SIZEOF_SIZE_T
+ #define __Pyx_sst_abs(value) abs(value)
+#elif SIZEOF_LONG >= SIZEOF_SIZE_T
+ #define __Pyx_sst_abs(value) labs(value)
+#elif defined (_MSC_VER)
+ #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value))
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define __Pyx_sst_abs(value) llabs(value)
+#elif defined (__GNUC__)
+ #define __Pyx_sst_abs(value) __builtin_llabs(value)
+#else
+ #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
+#endif
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*);
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
+#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
+#define __Pyx_PyBytes_FromString PyBytes_FromString
+#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
+ #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#else
+ #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
+ #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
+#endif
+#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyObject_AsWritableString(s) ((char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableSString(s) ((signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s)
+#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s)
+#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s)
+#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
+static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) {
+ const Py_UNICODE *u_end = u;
+ while (*u_end++) ;
+ return (size_t)(u_end - u - 1);
+}
+#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
+#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
+#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
+#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
+#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
+#define __Pyx_PySequence_Tuple(obj)\
+ (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj))
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*);
+#if CYTHON_ASSUME_SAFE_MACROS
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#else
+#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
+#endif
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
+#else
+#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
+#endif
+#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x))
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+static int __Pyx_sys_getdefaultencoding_not_ascii;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+ PyObject* sys;
+ PyObject* default_encoding = NULL;
+ PyObject* ascii_chars_u = NULL;
+ PyObject* ascii_chars_b = NULL;
+ const char* default_encoding_c;
+ sys = PyImport_ImportModule("sys");
+ if (!sys) goto bad;
+ default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
+ Py_DECREF(sys);
+ if (!default_encoding) goto bad;
+ default_encoding_c = PyBytes_AsString(default_encoding);
+ if (!default_encoding_c) goto bad;
+ if (strcmp(default_encoding_c, "ascii") == 0) {
+ __Pyx_sys_getdefaultencoding_not_ascii = 0;
+ } else {
+ char ascii_chars[128];
+ int c;
+ for (c = 0; c < 128; c++) {
+ ascii_chars[c] = c;
+ }
+ __Pyx_sys_getdefaultencoding_not_ascii = 1;
+ ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
+ if (!ascii_chars_u) goto bad;
+ ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
+ if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
+ PyErr_Format(
+ PyExc_ValueError,
+ "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
+ default_encoding_c);
+ goto bad;
+ }
+ Py_DECREF(ascii_chars_u);
+ Py_DECREF(ascii_chars_b);
+ }
+ Py_DECREF(default_encoding);
+ return 0;
+bad:
+ Py_XDECREF(default_encoding);
+ Py_XDECREF(ascii_chars_u);
+ Py_XDECREF(ascii_chars_b);
+ return -1;
+}
+#endif
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
+#else
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+static char* __PYX_DEFAULT_STRING_ENCODING;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+ PyObject* sys;
+ PyObject* default_encoding = NULL;
+ char* default_encoding_c;
+ sys = PyImport_ImportModule("sys");
+ if (!sys) goto bad;
+ default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
+ Py_DECREF(sys);
+ if (!default_encoding) goto bad;
+ default_encoding_c = PyBytes_AsString(default_encoding);
+ if (!default_encoding_c) goto bad;
+ __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1);
+ if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
+ strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
+ Py_DECREF(default_encoding);
+ return 0;
+bad:
+ Py_XDECREF(default_encoding);
+ return -1;
+}
+#endif
+#endif
+
+
+/* Test for GCC > 2.95 */
+#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
+ #define likely(x) __builtin_expect(!!(x), 1)
+ #define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* !__GNUC__ or GCC < 2.95 */
+ #define likely(x) (x)
+ #define unlikely(x) (x)
+#endif /* __GNUC__ */
+static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }
+
+static PyObject *__pyx_m = NULL;
+static PyObject *__pyx_d;
+static PyObject *__pyx_b;
+static PyObject *__pyx_cython_runtime = NULL;
+static PyObject *__pyx_empty_tuple;
+static PyObject *__pyx_empty_bytes;
+static PyObject *__pyx_empty_unicode;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+
+/* Header.proto */
+#if !defined(CYTHON_CCOMPLEX)
+ #if defined(__cplusplus)
+ #define CYTHON_CCOMPLEX 1
+ #elif defined(_Complex_I)
+ #define CYTHON_CCOMPLEX 1
+ #else
+ #define CYTHON_CCOMPLEX 0
+ #endif
+#endif
+#if CYTHON_CCOMPLEX
+ #ifdef __cplusplus
+ #include
+ #else
+ #include
+ #endif
+#endif
+#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
+ #undef _Complex_I
+ #define _Complex_I 1.0fj
+#endif
+
+
+static const char *__pyx_f[] = {
+ "box_overlaps.pyx",
+ "__init__.pxd",
+ "type.pxd",
+};
+/* BufferFormatStructs.proto */
+#define IS_UNSIGNED(type) (((type) -1) > 0)
+struct __Pyx_StructField_;
+#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
+typedef struct {
+ const char* name;
+ struct __Pyx_StructField_* fields;
+ size_t size;
+ size_t arraysize[8];
+ int ndim;
+ char typegroup;
+ char is_unsigned;
+ int flags;
+} __Pyx_TypeInfo;
+typedef struct __Pyx_StructField_ {
+ __Pyx_TypeInfo* type;
+ const char* name;
+ size_t offset;
+} __Pyx_StructField;
+typedef struct {
+ __Pyx_StructField* field;
+ size_t parent_offset;
+} __Pyx_BufFmt_StackElem;
+typedef struct {
+ __Pyx_StructField root;
+ __Pyx_BufFmt_StackElem* head;
+ size_t fmt_offset;
+ size_t new_count, enc_count;
+ size_t struct_alignment;
+ int is_complex;
+ char enc_type;
+ char new_packmode;
+ char enc_packmode;
+ char is_valid_array;
+} __Pyx_BufFmt_Context;
+
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":689
+ * # in Cython to enable them only on the right systems.
+ *
+ * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<<
+ * ctypedef npy_int16 int16_t
+ * ctypedef npy_int32 int32_t
+ */
+typedef npy_int8 __pyx_t_5numpy_int8_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":690
+ *
+ * ctypedef npy_int8 int8_t
+ * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<<
+ * ctypedef npy_int32 int32_t
+ * ctypedef npy_int64 int64_t
+ */
+typedef npy_int16 __pyx_t_5numpy_int16_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":691
+ * ctypedef npy_int8 int8_t
+ * ctypedef npy_int16 int16_t
+ * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<<
+ * ctypedef npy_int64 int64_t
+ * #ctypedef npy_int96 int96_t
+ */
+typedef npy_int32 __pyx_t_5numpy_int32_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":692
+ * ctypedef npy_int16 int16_t
+ * ctypedef npy_int32 int32_t
+ * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<<
+ * #ctypedef npy_int96 int96_t
+ * #ctypedef npy_int128 int128_t
+ */
+typedef npy_int64 __pyx_t_5numpy_int64_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":696
+ * #ctypedef npy_int128 int128_t
+ *
+ * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<<
+ * ctypedef npy_uint16 uint16_t
+ * ctypedef npy_uint32 uint32_t
+ */
+typedef npy_uint8 __pyx_t_5numpy_uint8_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":697
+ *
+ * ctypedef npy_uint8 uint8_t
+ * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<<
+ * ctypedef npy_uint32 uint32_t
+ * ctypedef npy_uint64 uint64_t
+ */
+typedef npy_uint16 __pyx_t_5numpy_uint16_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":698
+ * ctypedef npy_uint8 uint8_t
+ * ctypedef npy_uint16 uint16_t
+ * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<<
+ * ctypedef npy_uint64 uint64_t
+ * #ctypedef npy_uint96 uint96_t
+ */
+typedef npy_uint32 __pyx_t_5numpy_uint32_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":699
+ * ctypedef npy_uint16 uint16_t
+ * ctypedef npy_uint32 uint32_t
+ * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<<
+ * #ctypedef npy_uint96 uint96_t
+ * #ctypedef npy_uint128 uint128_t
+ */
+typedef npy_uint64 __pyx_t_5numpy_uint64_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":703
+ * #ctypedef npy_uint128 uint128_t
+ *
+ * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<<
+ * ctypedef npy_float64 float64_t
+ * #ctypedef npy_float80 float80_t
+ */
+typedef npy_float32 __pyx_t_5numpy_float32_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":704
+ *
+ * ctypedef npy_float32 float32_t
+ * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<<
+ * #ctypedef npy_float80 float80_t
+ * #ctypedef npy_float128 float128_t
+ */
+typedef npy_float64 __pyx_t_5numpy_float64_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":713
+ * # The int types are mapped a bit surprising --
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long int_t # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong long_t
+ * ctypedef npy_longlong longlong_t
+ */
+typedef npy_long __pyx_t_5numpy_int_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":714
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long int_t
+ * ctypedef npy_longlong long_t # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong longlong_t
+ *
+ */
+typedef npy_longlong __pyx_t_5numpy_long_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":715
+ * ctypedef npy_long int_t
+ * ctypedef npy_longlong long_t
+ * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_ulong uint_t
+ */
+typedef npy_longlong __pyx_t_5numpy_longlong_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":717
+ * ctypedef npy_longlong longlong_t
+ *
+ * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong ulong_t
+ * ctypedef npy_ulonglong ulonglong_t
+ */
+typedef npy_ulong __pyx_t_5numpy_uint_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":718
+ *
+ * ctypedef npy_ulong uint_t
+ * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong ulonglong_t
+ *
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":719
+ * ctypedef npy_ulong uint_t
+ * ctypedef npy_ulonglong ulong_t
+ * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_intp intp_t
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":721
+ * ctypedef npy_ulonglong ulonglong_t
+ *
+ * ctypedef npy_intp intp_t # <<<<<<<<<<<<<<
+ * ctypedef npy_uintp uintp_t
+ *
+ */
+typedef npy_intp __pyx_t_5numpy_intp_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":722
+ *
+ * ctypedef npy_intp intp_t
+ * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_double float_t
+ */
+typedef npy_uintp __pyx_t_5numpy_uintp_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":724
+ * ctypedef npy_uintp uintp_t
+ *
+ * ctypedef npy_double float_t # <<<<<<<<<<<<<<
+ * ctypedef npy_double double_t
+ * ctypedef npy_longdouble longdouble_t
+ */
+typedef npy_double __pyx_t_5numpy_float_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":725
+ *
+ * ctypedef npy_double float_t
+ * ctypedef npy_double double_t # <<<<<<<<<<<<<<
+ * ctypedef npy_longdouble longdouble_t
+ *
+ */
+typedef npy_double __pyx_t_5numpy_double_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":726
+ * ctypedef npy_double float_t
+ * ctypedef npy_double double_t
+ * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_cfloat cfloat_t
+ */
+typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
+
+/* "box_overlaps.pyx":13
+ *
+ * DTYPE = np.float
+ * ctypedef np.float_t DTYPE_t # <<<<<<<<<<<<<<
+ *
+ * def bbox_overlaps(
+ */
+typedef __pyx_t_5numpy_float_t __pyx_t_4bbox_DTYPE_t;
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX
+ #ifdef __cplusplus
+ typedef ::std::complex< float > __pyx_t_float_complex;
+ #else
+ typedef float _Complex __pyx_t_float_complex;
+ #endif
+#else
+ typedef struct { float real, imag; } __pyx_t_float_complex;
+#endif
+static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
+
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX
+ #ifdef __cplusplus
+ typedef ::std::complex< double > __pyx_t_double_complex;
+ #else
+ typedef double _Complex __pyx_t_double_complex;
+ #endif
+#else
+ typedef struct { double real, imag; } __pyx_t_double_complex;
+#endif
+static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
+
+
+/*--- Type declarations ---*/
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":728
+ * ctypedef npy_longdouble longdouble_t
+ *
+ * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<<
+ * ctypedef npy_cdouble cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t
+ */
+typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":729
+ *
+ * ctypedef npy_cfloat cfloat_t
+ * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<<
+ * ctypedef npy_clongdouble clongdouble_t
+ *
+ */
+typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":730
+ * ctypedef npy_cfloat cfloat_t
+ * ctypedef npy_cdouble cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<<
+ *
+ * ctypedef npy_cdouble complex_t
+ */
+typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":732
+ * ctypedef npy_clongdouble clongdouble_t
+ *
+ * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew1(a):
+ */
+typedef npy_cdouble __pyx_t_5numpy_complex_t;
+
+/* --- Runtime support code (head) --- */
+/* Refnanny.proto */
+#ifndef CYTHON_REFNANNY
+ #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+ typedef struct {
+ void (*INCREF)(void*, PyObject*, int);
+ void (*DECREF)(void*, PyObject*, int);
+ void (*GOTREF)(void*, PyObject*, int);
+ void (*GIVEREF)(void*, PyObject*, int);
+ void* (*SetupContext)(const char*, int, const char*);
+ void (*FinishContext)(void**);
+ } __Pyx_RefNannyAPIStruct;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
+ #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+ if (acquire_gil) {\
+ PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+ PyGILState_Release(__pyx_gilstate_save);\
+ } else {\
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+ }
+#else
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
+#endif
+ #define __Pyx_RefNannyFinishContext()\
+ __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+ #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
+ #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
+ #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
+ #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
+#else
+ #define __Pyx_RefNannyDeclarations
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)
+ #define __Pyx_RefNannyFinishContext()
+ #define __Pyx_INCREF(r) Py_INCREF(r)
+ #define __Pyx_DECREF(r) Py_DECREF(r)
+ #define __Pyx_GOTREF(r)
+ #define __Pyx_GIVEREF(r)
+ #define __Pyx_XINCREF(r) Py_XINCREF(r)
+ #define __Pyx_XDECREF(r) Py_XDECREF(r)
+ #define __Pyx_XGOTREF(r)
+ #define __Pyx_XGIVEREF(r)
+#endif
+#define __Pyx_XDECREF_SET(r, v) do {\
+ PyObject *tmp = (PyObject *) r;\
+ r = v; __Pyx_XDECREF(tmp);\
+ } while (0)
+#define __Pyx_DECREF_SET(r, v) do {\
+ PyObject *tmp = (PyObject *) r;\
+ r = v; __Pyx_DECREF(tmp);\
+ } while (0)
+#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+/* PyObjectGetAttrStr.proto */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
+#endif
+
+/* GetBuiltinName.proto */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name);
+
+/* RaiseArgTupleInvalid.proto */
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+ Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
+
+/* RaiseDoubleKeywords.proto */
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
+
+/* ParseKeywords.proto */
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\
+ PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\
+ const char* function_name);
+
+/* ArgTypeTest.proto */
+#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\
+ ((likely((Py_TYPE(obj) == type) | (none_allowed && (obj == Py_None)))) ? 1 :\
+ __Pyx__ArgTypeTest(obj, type, name, exact))
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact);
+
+/* IsLittleEndian.proto */
+static CYTHON_INLINE int __Pyx_Is_Little_Endian(void);
+
+/* BufferFormatCheck.proto */
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts);
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+ __Pyx_BufFmt_StackElem* stack,
+ __Pyx_TypeInfo* type);
+
+/* BufferGetAndValidate.proto */
+#define __Pyx_GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)\
+ ((obj == Py_None || obj == NULL) ?\
+ (__Pyx_ZeroBuffer(buf), 0) :\
+ __Pyx__GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack))
+static int __Pyx__GetBufferAndValidate(Py_buffer* buf, PyObject* obj,
+ __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack);
+static void __Pyx_ZeroBuffer(Py_buffer* buf);
+static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info);
+static Py_ssize_t __Pyx_minusones[] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+static Py_ssize_t __Pyx_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/* PyDictVersioning.proto */
+#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1)
+#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\
+ (version_var) = __PYX_GET_DICT_VERSION(dict);\
+ (cache_var) = (value);
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\
+ static PY_UINT64_T __pyx_dict_version = 0;\
+ static PyObject *__pyx_dict_cached_value = NULL;\
+ if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\
+ (VAR) = __pyx_dict_cached_value;\
+ } else {\
+ (VAR) = __pyx_dict_cached_value = (LOOKUP);\
+ __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\
+ }\
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj);
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj);
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version);
+#else
+#define __PYX_GET_DICT_VERSION(dict) (0)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP);
+#endif
+
+/* GetModuleGlobalName.proto */
+#if CYTHON_USE_DICT_VERSIONS
+#define __Pyx_GetModuleGlobalName(var, name) do {\
+ static PY_UINT64_T __pyx_dict_version = 0;\
+ static PyObject *__pyx_dict_cached_value = NULL;\
+ (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
+ (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
+ __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\
+ PY_UINT64_T __pyx_dict_version;\
+ PyObject *__pyx_dict_cached_value;\
+ (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
+#else
+#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name)
+#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name)
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name);
+#endif
+
+/* PyObjectCall.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
+#else
+#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
+#endif
+
+/* ExtTypeTest.proto */
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type);
+
+/* BufferIndexError.proto */
+static void __Pyx_RaiseBufferIndexError(int axis);
+
+#define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1)
+/* PyThreadStateGet.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate;
+#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current;
+#define __Pyx_PyErr_Occurred() __pyx_tstate->curexc_type
+#else
+#define __Pyx_PyThreadState_declare
+#define __Pyx_PyThreadState_assign
+#define __Pyx_PyErr_Occurred() PyErr_Occurred()
+#endif
+
+/* PyErrFetchRestore.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL)
+#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL))
+#else
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#endif
+#else
+#define __Pyx_PyErr_Clear() PyErr_Clear()
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb)
+#endif
+
+/* GetTopmostException.proto */
+#if CYTHON_USE_EXC_INFO_STACK
+static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate);
+#endif
+
+/* SaveResetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+#else
+#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb)
+#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb)
+#endif
+
+/* PyErrExceptionMatches.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err)
+static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err);
+#else
+#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err)
+#endif
+
+/* GetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb)
+static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#else
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb);
+#endif
+
+/* RaiseException.proto */
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
+
+/* TypeImport.proto */
+#ifndef __PYX_HAVE_RT_ImportType_proto_0_29_36
+#define __PYX_HAVE_RT_ImportType_proto_0_29_36
+#if __STDC_VERSION__ >= 201112L
+#include
+#endif
+#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L
+#define __PYX_GET_STRUCT_ALIGNMENT_0_29_36(s) alignof(s)
+#else
+#define __PYX_GET_STRUCT_ALIGNMENT_0_29_36(s) sizeof(void*)
+#endif
+enum __Pyx_ImportType_CheckSize_0_29_36 {
+ __Pyx_ImportType_CheckSize_Error_0_29_36 = 0,
+ __Pyx_ImportType_CheckSize_Warn_0_29_36 = 1,
+ __Pyx_ImportType_CheckSize_Ignore_0_29_36 = 2
+};
+static PyTypeObject *__Pyx_ImportType_0_29_36(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_0_29_36 check_size);
+#endif
+
+/* Import.proto */
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
+
+/* CLineInTraceback.proto */
+#ifdef CYTHON_CLINE_IN_TRACEBACK
+#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0)
+#else
+static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line);
+#endif
+
+/* CodeObjectCache.proto */
+typedef struct {
+ PyCodeObject* code_object;
+ int code_line;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+ int count;
+ int max_count;
+ __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+
+/* AddTraceback.proto */
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+ int py_line, const char *filename);
+
+/* BufferStructDeclare.proto */
+typedef struct {
+ Py_ssize_t shape, strides, suboffsets;
+} __Pyx_Buf_DimInfo;
+typedef struct {
+ size_t refcount;
+ Py_buffer pybuffer;
+} __Pyx_Buffer;
+typedef struct {
+ __Pyx_Buffer *rcbuffer;
+ char *data;
+ __Pyx_Buf_DimInfo diminfo[8];
+} __Pyx_LocalBuf_ND;
+
+#if PY_MAJOR_VERSION < 3
+ static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
+ static void __Pyx_ReleaseBuffer(Py_buffer *view);
+#else
+ #define __Pyx_GetBuffer PyObject_GetBuffer
+ #define __Pyx_ReleaseBuffer PyBuffer_Release
+#endif
+
+
+/* GCCDiagnostics.proto */
+#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+#define __Pyx_HAS_GCC_DIAGNOSTIC
+#endif
+
+/* RealImag.proto */
+#if CYTHON_CCOMPLEX
+ #ifdef __cplusplus
+ #define __Pyx_CREAL(z) ((z).real())
+ #define __Pyx_CIMAG(z) ((z).imag())
+ #else
+ #define __Pyx_CREAL(z) (__real__(z))
+ #define __Pyx_CIMAG(z) (__imag__(z))
+ #endif
+#else
+ #define __Pyx_CREAL(z) ((z).real)
+ #define __Pyx_CIMAG(z) ((z).imag)
+#endif
+#if defined(__cplusplus) && CYTHON_CCOMPLEX\
+ && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103)
+ #define __Pyx_SET_CREAL(z,x) ((z).real(x))
+ #define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
+#else
+ #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
+ #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX
+ #define __Pyx_c_eq_float(a, b) ((a)==(b))
+ #define __Pyx_c_sum_float(a, b) ((a)+(b))
+ #define __Pyx_c_diff_float(a, b) ((a)-(b))
+ #define __Pyx_c_prod_float(a, b) ((a)*(b))
+ #define __Pyx_c_quot_float(a, b) ((a)/(b))
+ #define __Pyx_c_neg_float(a) (-(a))
+ #ifdef __cplusplus
+ #define __Pyx_c_is_zero_float(z) ((z)==(float)0)
+ #define __Pyx_c_conj_float(z) (::std::conj(z))
+ #if 1
+ #define __Pyx_c_abs_float(z) (::std::abs(z))
+ #define __Pyx_c_pow_float(a, b) (::std::pow(a, b))
+ #endif
+ #else
+ #define __Pyx_c_is_zero_float(z) ((z)==0)
+ #define __Pyx_c_conj_float(z) (conjf(z))
+ #if 1
+ #define __Pyx_c_abs_float(z) (cabsf(z))
+ #define __Pyx_c_pow_float(a, b) (cpowf(a, b))
+ #endif
+ #endif
+#else
+ static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex);
+ static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex);
+ #if 1
+ static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex);
+ static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex);
+ #endif
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX
+ #define __Pyx_c_eq_double(a, b) ((a)==(b))
+ #define __Pyx_c_sum_double(a, b) ((a)+(b))
+ #define __Pyx_c_diff_double(a, b) ((a)-(b))
+ #define __Pyx_c_prod_double(a, b) ((a)*(b))
+ #define __Pyx_c_quot_double(a, b) ((a)/(b))
+ #define __Pyx_c_neg_double(a) (-(a))
+ #ifdef __cplusplus
+ #define __Pyx_c_is_zero_double(z) ((z)==(double)0)
+ #define __Pyx_c_conj_double(z) (::std::conj(z))
+ #if 1
+ #define __Pyx_c_abs_double(z) (::std::abs(z))
+ #define __Pyx_c_pow_double(a, b) (::std::pow(a, b))
+ #endif
+ #else
+ #define __Pyx_c_is_zero_double(z) ((z)==0)
+ #define __Pyx_c_conj_double(z) (conj(z))
+ #if 1
+ #define __Pyx_c_abs_double(z) (cabs(z))
+ #define __Pyx_c_pow_double(a, b) (cpow(a, b))
+ #endif
+ #endif
+#else
+ static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex);
+ static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex);
+ #if 1
+ static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex);
+ static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex);
+ #endif
+#endif
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
+
+/* FastTypeChecks.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type)
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2);
+#else
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type)
+#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2))
+#endif
+#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception)
+
+/* CheckBinaryVersion.proto */
+static int __Pyx_check_binary_version(void);
+
+/* InitStrings.proto */
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
+
+
+/* Module declarations from 'cython' */
+
+/* Module declarations from 'cpython.buffer' */
+
+/* Module declarations from 'libc.string' */
+
+/* Module declarations from 'libc.stdio' */
+
+/* Module declarations from '__builtin__' */
+
+/* Module declarations from 'cpython.type' */
+static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0;
+
+/* Module declarations from 'cpython' */
+
+/* Module declarations from 'cpython.object' */
+
+/* Module declarations from 'cpython.ref' */
+
+/* Module declarations from 'cpython.mem' */
+
+/* Module declarations from 'numpy' */
+
+/* Module declarations from 'numpy' */
+static PyTypeObject *__pyx_ptype_5numpy_dtype = 0;
+static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0;
+static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
+static PyTypeObject *__pyx_ptype_5numpy_generic = 0;
+static PyTypeObject *__pyx_ptype_5numpy_number = 0;
+static PyTypeObject *__pyx_ptype_5numpy_integer = 0;
+static PyTypeObject *__pyx_ptype_5numpy_signedinteger = 0;
+static PyTypeObject *__pyx_ptype_5numpy_unsignedinteger = 0;
+static PyTypeObject *__pyx_ptype_5numpy_inexact = 0;
+static PyTypeObject *__pyx_ptype_5numpy_floating = 0;
+static PyTypeObject *__pyx_ptype_5numpy_complexfloating = 0;
+static PyTypeObject *__pyx_ptype_5numpy_flexible = 0;
+static PyTypeObject *__pyx_ptype_5numpy_character = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
+
+/* Module declarations from 'bbox' */
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_4bbox_DTYPE_t = { "DTYPE_t", NULL, sizeof(__pyx_t_4bbox_DTYPE_t), { 0 }, 0, 'R', 0, 0 };
+#define __Pyx_MODULE_NAME "bbox"
+extern int __pyx_module_is_main_bbox;
+int __pyx_module_is_main_bbox = 0;
+
+/* Implementation of 'bbox' */
+static PyObject *__pyx_builtin_range;
+static PyObject *__pyx_builtin_ImportError;
+static const char __pyx_k_K[] = "K";
+static const char __pyx_k_N[] = "N";
+static const char __pyx_k_k[] = "k";
+static const char __pyx_k_n[] = "n";
+static const char __pyx_k_ih[] = "ih";
+static const char __pyx_k_iw[] = "iw";
+static const char __pyx_k_np[] = "np";
+static const char __pyx_k_ua[] = "ua";
+static const char __pyx_k_bbox[] = "bbox";
+static const char __pyx_k_main[] = "__main__";
+static const char __pyx_k_name[] = "__name__";
+static const char __pyx_k_test[] = "__test__";
+static const char __pyx_k_DTYPE[] = "DTYPE";
+static const char __pyx_k_boxes[] = "boxes";
+static const char __pyx_k_dtype[] = "dtype";
+static const char __pyx_k_float[] = "float";
+static const char __pyx_k_numpy[] = "numpy";
+static const char __pyx_k_range[] = "range";
+static const char __pyx_k_zeros[] = "zeros";
+static const char __pyx_k_import[] = "__import__";
+static const char __pyx_k_box_area[] = "box_area";
+static const char __pyx_k_overlaps[] = "overlaps";
+static const char __pyx_k_ImportError[] = "ImportError";
+static const char __pyx_k_query_boxes[] = "query_boxes";
+static const char __pyx_k_bbox_overlaps[] = "bbox_overlaps";
+static const char __pyx_k_box_overlaps_pyx[] = "box_overlaps.pyx";
+static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback";
+static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import";
+static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import";
+static PyObject *__pyx_n_s_DTYPE;
+static PyObject *__pyx_n_s_ImportError;
+static PyObject *__pyx_n_s_K;
+static PyObject *__pyx_n_s_N;
+static PyObject *__pyx_n_s_bbox;
+static PyObject *__pyx_n_s_bbox_overlaps;
+static PyObject *__pyx_n_s_box_area;
+static PyObject *__pyx_kp_s_box_overlaps_pyx;
+static PyObject *__pyx_n_s_boxes;
+static PyObject *__pyx_n_s_cline_in_traceback;
+static PyObject *__pyx_n_s_dtype;
+static PyObject *__pyx_n_s_float;
+static PyObject *__pyx_n_s_ih;
+static PyObject *__pyx_n_s_import;
+static PyObject *__pyx_n_s_iw;
+static PyObject *__pyx_n_s_k;
+static PyObject *__pyx_n_s_main;
+static PyObject *__pyx_n_s_n;
+static PyObject *__pyx_n_s_name;
+static PyObject *__pyx_n_s_np;
+static PyObject *__pyx_n_s_numpy;
+static PyObject *__pyx_kp_s_numpy_core_multiarray_failed_to;
+static PyObject *__pyx_kp_s_numpy_core_umath_failed_to_impor;
+static PyObject *__pyx_n_s_overlaps;
+static PyObject *__pyx_n_s_query_boxes;
+static PyObject *__pyx_n_s_range;
+static PyObject *__pyx_n_s_test;
+static PyObject *__pyx_n_s_ua;
+static PyObject *__pyx_n_s_zeros;
+static PyObject *__pyx_pf_4bbox_bbox_overlaps(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes); /* proto */
+static PyObject *__pyx_tuple_;
+static PyObject *__pyx_tuple__2;
+static PyObject *__pyx_tuple__3;
+static PyObject *__pyx_codeobj__4;
+/* Late includes */
+
+/* "box_overlaps.pyx":15
+ * ctypedef np.float_t DTYPE_t
+ *
+ * def bbox_overlaps( # <<<<<<<<<<<<<<
+ * np.ndarray[DTYPE_t, ndim=2] boxes,
+ * np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_4bbox_1bbox_overlaps(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_4bbox_bbox_overlaps[] = "\n Parameters\n ----------\n boxes: (N, 4) ndarray of float\n query_boxes: (K, 4) ndarray of float\n Returns\n -------\n overlaps: (N, K) ndarray of overlap between boxes and query_boxes\n ";
+static PyMethodDef __pyx_mdef_4bbox_1bbox_overlaps = {"bbox_overlaps", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_4bbox_1bbox_overlaps, METH_VARARGS|METH_KEYWORDS, __pyx_doc_4bbox_bbox_overlaps};
+static PyObject *__pyx_pw_4bbox_1bbox_overlaps(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyArrayObject *__pyx_v_boxes = 0;
+ PyArrayObject *__pyx_v_query_boxes = 0;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("bbox_overlaps (wrapper)", 0);
+ {
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_boxes,&__pyx_n_s_query_boxes,0};
+ PyObject* values[2] = {0,0};
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ CYTHON_FALLTHROUGH;
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ CYTHON_FALLTHROUGH;
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_boxes)) != 0)) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ CYTHON_FALLTHROUGH;
+ case 1:
+ if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_query_boxes)) != 0)) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("bbox_overlaps", 1, 2, 2, 1); __PYX_ERR(0, 15, __pyx_L3_error)
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "bbox_overlaps") < 0)) __PYX_ERR(0, 15, __pyx_L3_error)
+ }
+ } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
+ goto __pyx_L5_argtuple_error;
+ } else {
+ values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ }
+ __pyx_v_boxes = ((PyArrayObject *)values[0]);
+ __pyx_v_query_boxes = ((PyArrayObject *)values[1]);
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("bbox_overlaps", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 15, __pyx_L3_error)
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("bbox.bbox_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_boxes), __pyx_ptype_5numpy_ndarray, 1, "boxes", 0))) __PYX_ERR(0, 16, __pyx_L1_error)
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_query_boxes), __pyx_ptype_5numpy_ndarray, 1, "query_boxes", 0))) __PYX_ERR(0, 17, __pyx_L1_error)
+ __pyx_r = __pyx_pf_4bbox_bbox_overlaps(__pyx_self, __pyx_v_boxes, __pyx_v_query_boxes);
+
+ /* function exit code */
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_4bbox_bbox_overlaps(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes) {
+ unsigned int __pyx_v_N;
+ unsigned int __pyx_v_K;
+ PyArrayObject *__pyx_v_overlaps = 0;
+ __pyx_t_4bbox_DTYPE_t __pyx_v_iw;
+ __pyx_t_4bbox_DTYPE_t __pyx_v_ih;
+ __pyx_t_4bbox_DTYPE_t __pyx_v_box_area;
+ __pyx_t_4bbox_DTYPE_t __pyx_v_ua;
+ unsigned int __pyx_v_k;
+ unsigned int __pyx_v_n;
+ __Pyx_LocalBuf_ND __pyx_pybuffernd_boxes;
+ __Pyx_Buffer __pyx_pybuffer_boxes;
+ __Pyx_LocalBuf_ND __pyx_pybuffernd_overlaps;
+ __Pyx_Buffer __pyx_pybuffer_overlaps;
+ __Pyx_LocalBuf_ND __pyx_pybuffernd_query_boxes;
+ __Pyx_Buffer __pyx_pybuffer_query_boxes;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyArrayObject *__pyx_t_5 = NULL;
+ unsigned int __pyx_t_6;
+ unsigned int __pyx_t_7;
+ unsigned int __pyx_t_8;
+ size_t __pyx_t_9;
+ Py_ssize_t __pyx_t_10;
+ int __pyx_t_11;
+ size_t __pyx_t_12;
+ Py_ssize_t __pyx_t_13;
+ size_t __pyx_t_14;
+ Py_ssize_t __pyx_t_15;
+ size_t __pyx_t_16;
+ Py_ssize_t __pyx_t_17;
+ unsigned int __pyx_t_18;
+ unsigned int __pyx_t_19;
+ unsigned int __pyx_t_20;
+ __pyx_t_4bbox_DTYPE_t __pyx_t_21;
+ __pyx_t_4bbox_DTYPE_t __pyx_t_22;
+ __pyx_t_4bbox_DTYPE_t __pyx_t_23;
+ __pyx_t_4bbox_DTYPE_t __pyx_t_24;
+ int __pyx_t_25;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("bbox_overlaps", 0);
+ __pyx_pybuffer_overlaps.pybuffer.buf = NULL;
+ __pyx_pybuffer_overlaps.refcount = 0;
+ __pyx_pybuffernd_overlaps.data = NULL;
+ __pyx_pybuffernd_overlaps.rcbuffer = &__pyx_pybuffer_overlaps;
+ __pyx_pybuffer_boxes.pybuffer.buf = NULL;
+ __pyx_pybuffer_boxes.refcount = 0;
+ __pyx_pybuffernd_boxes.data = NULL;
+ __pyx_pybuffernd_boxes.rcbuffer = &__pyx_pybuffer_boxes;
+ __pyx_pybuffer_query_boxes.pybuffer.buf = NULL;
+ __pyx_pybuffer_query_boxes.refcount = 0;
+ __pyx_pybuffernd_query_boxes.data = NULL;
+ __pyx_pybuffernd_query_boxes.rcbuffer = &__pyx_pybuffer_query_boxes;
+ {
+ __Pyx_BufFmt_StackElem __pyx_stack[1];
+ if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_boxes, &__Pyx_TypeInfo_nn___pyx_t_4bbox_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 15, __pyx_L1_error)
+ }
+ __pyx_pybuffernd_boxes.diminfo[0].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_boxes.diminfo[0].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_boxes.diminfo[1].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_boxes.diminfo[1].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[1];
+ {
+ __Pyx_BufFmt_StackElem __pyx_stack[1];
+ if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_query_boxes, &__Pyx_TypeInfo_nn___pyx_t_4bbox_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 15, __pyx_L1_error)
+ }
+ __pyx_pybuffernd_query_boxes.diminfo[0].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_query_boxes.diminfo[0].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_query_boxes.diminfo[1].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_query_boxes.diminfo[1].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[1];
+
+ /* "box_overlaps.pyx":27
+ * overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+ * """
+ * cdef unsigned int N = boxes.shape[0] # <<<<<<<<<<<<<<
+ * cdef unsigned int K = query_boxes.shape[0]
+ * cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+ */
+ __pyx_v_N = (__pyx_v_boxes->dimensions[0]);
+
+ /* "box_overlaps.pyx":28
+ * """
+ * cdef unsigned int N = boxes.shape[0]
+ * cdef unsigned int K = query_boxes.shape[0] # <<<<<<<<<<<<<<
+ * cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+ * cdef DTYPE_t iw, ih, box_area
+ */
+ __pyx_v_K = (__pyx_v_query_boxes->dimensions[0]);
+
+ /* "box_overlaps.pyx":29
+ * cdef unsigned int N = boxes.shape[0]
+ * cdef unsigned int K = query_boxes.shape[0]
+ * cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) # <<<<<<<<<<<<<<
+ * cdef DTYPE_t iw, ih, box_area
+ * cdef DTYPE_t ua
+ */
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_zeros); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = __Pyx_PyInt_From_unsigned_int(__pyx_v_N); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_3 = __Pyx_PyInt_From_unsigned_int(__pyx_v_K); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_GIVEREF(__pyx_t_1);
+ PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_3);
+ __pyx_t_1 = 0;
+ __pyx_t_3 = 0;
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_4);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4);
+ __pyx_t_4 = 0;
+ __pyx_t_4 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_DTYPE); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_dtype, __pyx_t_1) < 0) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, __pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 29, __pyx_L1_error)
+ __pyx_t_5 = ((PyArrayObject *)__pyx_t_1);
+ {
+ __Pyx_BufFmt_StackElem __pyx_stack[1];
+ if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_4bbox_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES| PyBUF_WRITABLE, 2, 0, __pyx_stack) == -1)) {
+ __pyx_v_overlaps = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.buf = NULL;
+ __PYX_ERR(0, 29, __pyx_L1_error)
+ } else {__pyx_pybuffernd_overlaps.diminfo[0].strides = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_overlaps.diminfo[0].shape = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_overlaps.diminfo[1].strides = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_overlaps.diminfo[1].shape = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.shape[1];
+ }
+ }
+ __pyx_t_5 = 0;
+ __pyx_v_overlaps = ((PyArrayObject *)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "box_overlaps.pyx":33
+ * cdef DTYPE_t ua
+ * cdef unsigned int k, n
+ * for k in range(K): # <<<<<<<<<<<<<<
+ * box_area = (
+ * (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+ */
+ __pyx_t_6 = __pyx_v_K;
+ __pyx_t_7 = __pyx_t_6;
+ for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
+ __pyx_v_k = __pyx_t_8;
+
+ /* "box_overlaps.pyx":35
+ * for k in range(K):
+ * box_area = (
+ * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * # <<<<<<<<<<<<<<
+ * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ * )
+ */
+ __pyx_t_9 = __pyx_v_k;
+ __pyx_t_10 = 2;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_9 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_10 < 0) {
+ __pyx_t_10 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_10 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_10 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 35, __pyx_L1_error)
+ }
+ __pyx_t_12 = __pyx_v_k;
+ __pyx_t_13 = 0;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_12 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_13 < 0) {
+ __pyx_t_13 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_13 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_13 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 35, __pyx_L1_error)
+ }
+
+ /* "box_overlaps.pyx":36
+ * box_area = (
+ * (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+ * (query_boxes[k, 3] - query_boxes[k, 1] + 1) # <<<<<<<<<<<<<<
+ * )
+ * for n in range(N):
+ */
+ __pyx_t_14 = __pyx_v_k;
+ __pyx_t_15 = 3;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_14 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_15 < 0) {
+ __pyx_t_15 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_15 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_15 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 36, __pyx_L1_error)
+ }
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 1;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 36, __pyx_L1_error)
+ }
+
+ /* "box_overlaps.pyx":35
+ * for k in range(K):
+ * box_area = (
+ * (query_boxes[k, 2] - query_boxes[k, 0] + 1) * # <<<<<<<<<<<<<<
+ * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ * )
+ */
+ __pyx_v_box_area = ((((*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_9, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_10, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_13, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_14, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0));
+
+ /* "box_overlaps.pyx":38
+ * (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ * )
+ * for n in range(N): # <<<<<<<<<<<<<<
+ * iw = (
+ * min(boxes[n, 2], query_boxes[k, 2]) -
+ */
+ __pyx_t_18 = __pyx_v_N;
+ __pyx_t_19 = __pyx_t_18;
+ for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_19; __pyx_t_20+=1) {
+ __pyx_v_n = __pyx_t_20;
+
+ /* "box_overlaps.pyx":40
+ * for n in range(N):
+ * iw = (
+ * min(boxes[n, 2], query_boxes[k, 2]) - # <<<<<<<<<<<<<<
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1
+ * )
+ */
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 2;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 40, __pyx_L1_error)
+ }
+ __pyx_t_21 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 2;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 40, __pyx_L1_error)
+ }
+ __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides));
+ if (((__pyx_t_21 < __pyx_t_22) != 0)) {
+ __pyx_t_23 = __pyx_t_21;
+ } else {
+ __pyx_t_23 = __pyx_t_22;
+ }
+
+ /* "box_overlaps.pyx":41
+ * iw = (
+ * min(boxes[n, 2], query_boxes[k, 2]) -
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1 # <<<<<<<<<<<<<<
+ * )
+ * if iw > 0:
+ */
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 0;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 41, __pyx_L1_error)
+ }
+ __pyx_t_21 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 0;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 41, __pyx_L1_error)
+ }
+ __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides));
+ if (((__pyx_t_21 > __pyx_t_22) != 0)) {
+ __pyx_t_24 = __pyx_t_21;
+ } else {
+ __pyx_t_24 = __pyx_t_22;
+ }
+
+ /* "box_overlaps.pyx":40
+ * for n in range(N):
+ * iw = (
+ * min(boxes[n, 2], query_boxes[k, 2]) - # <<<<<<<<<<<<<<
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1
+ * )
+ */
+ __pyx_v_iw = ((__pyx_t_23 - __pyx_t_24) + 1.0);
+
+ /* "box_overlaps.pyx":43
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1
+ * )
+ * if iw > 0: # <<<<<<<<<<<<<<
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) -
+ */
+ __pyx_t_25 = ((__pyx_v_iw > 0.0) != 0);
+ if (__pyx_t_25) {
+
+ /* "box_overlaps.pyx":45
+ * if iw > 0:
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) - # <<<<<<<<<<<<<<
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1
+ * )
+ */
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 3;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 45, __pyx_L1_error)
+ }
+ __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 3;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 45, __pyx_L1_error)
+ }
+ __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides));
+ if (((__pyx_t_24 < __pyx_t_23) != 0)) {
+ __pyx_t_21 = __pyx_t_24;
+ } else {
+ __pyx_t_21 = __pyx_t_23;
+ }
+
+ /* "box_overlaps.pyx":46
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) -
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1 # <<<<<<<<<<<<<<
+ * )
+ * if ih > 0:
+ */
+ __pyx_t_16 = __pyx_v_k;
+ __pyx_t_17 = 1;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 46, __pyx_L1_error)
+ }
+ __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 1;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 46, __pyx_L1_error)
+ }
+ __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides));
+ if (((__pyx_t_24 > __pyx_t_23) != 0)) {
+ __pyx_t_22 = __pyx_t_24;
+ } else {
+ __pyx_t_22 = __pyx_t_23;
+ }
+
+ /* "box_overlaps.pyx":45
+ * if iw > 0:
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) - # <<<<<<<<<<<<<<
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1
+ * )
+ */
+ __pyx_v_ih = ((__pyx_t_21 - __pyx_t_22) + 1.0);
+
+ /* "box_overlaps.pyx":48
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1
+ * )
+ * if ih > 0: # <<<<<<<<<<<<<<
+ * ua = float(
+ * (boxes[n, 2] - boxes[n, 0] + 1) *
+ */
+ __pyx_t_25 = ((__pyx_v_ih > 0.0) != 0);
+ if (__pyx_t_25) {
+
+ /* "box_overlaps.pyx":50
+ * if ih > 0:
+ * ua = float(
+ * (boxes[n, 2] - boxes[n, 0] + 1) * # <<<<<<<<<<<<<<
+ * (boxes[n, 3] - boxes[n, 1] + 1) +
+ * box_area - iw * ih
+ */
+ __pyx_t_16 = __pyx_v_n;
+ __pyx_t_17 = 2;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_16 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_17 < 0) {
+ __pyx_t_17 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_17 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_17 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 50, __pyx_L1_error)
+ }
+ __pyx_t_14 = __pyx_v_n;
+ __pyx_t_15 = 0;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_14 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_15 < 0) {
+ __pyx_t_15 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_15 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_15 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 50, __pyx_L1_error)
+ }
+
+ /* "box_overlaps.pyx":51
+ * ua = float(
+ * (boxes[n, 2] - boxes[n, 0] + 1) *
+ * (boxes[n, 3] - boxes[n, 1] + 1) + # <<<<<<<<<<<<<<
+ * box_area - iw * ih
+ * )
+ */
+ __pyx_t_12 = __pyx_v_n;
+ __pyx_t_13 = 3;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_12 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_13 < 0) {
+ __pyx_t_13 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_13 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_13 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 51, __pyx_L1_error)
+ }
+ __pyx_t_9 = __pyx_v_n;
+ __pyx_t_10 = 1;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_9 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (__pyx_t_10 < 0) {
+ __pyx_t_10 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+ if (unlikely(__pyx_t_10 < 0)) __pyx_t_11 = 1;
+ } else if (unlikely(__pyx_t_10 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 51, __pyx_L1_error)
+ }
+
+ /* "box_overlaps.pyx":49
+ * )
+ * if ih > 0:
+ * ua = float( # <<<<<<<<<<<<<<
+ * (boxes[n, 2] - boxes[n, 0] + 1) *
+ * (boxes[n, 3] - boxes[n, 1] + 1) +
+ */
+ __pyx_v_ua = ((double)((((((*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_14, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_13, __pyx_pybuffernd_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_9, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_10, __pyx_pybuffernd_boxes.diminfo[1].strides))) + 1.0)) + __pyx_v_box_area) - (__pyx_v_iw * __pyx_v_ih)));
+
+ /* "box_overlaps.pyx":54
+ * box_area - iw * ih
+ * )
+ * overlaps[n, k] = iw * ih / ua # <<<<<<<<<<<<<<
+ * return overlaps
+ */
+ __pyx_t_22 = (__pyx_v_iw * __pyx_v_ih);
+ if (unlikely(__pyx_v_ua == 0)) {
+ PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+ __PYX_ERR(0, 54, __pyx_L1_error)
+ }
+ __pyx_t_9 = __pyx_v_n;
+ __pyx_t_12 = __pyx_v_k;
+ __pyx_t_11 = -1;
+ if (unlikely(__pyx_t_9 >= (size_t)__pyx_pybuffernd_overlaps.diminfo[0].shape)) __pyx_t_11 = 0;
+ if (unlikely(__pyx_t_12 >= (size_t)__pyx_pybuffernd_overlaps.diminfo[1].shape)) __pyx_t_11 = 1;
+ if (unlikely(__pyx_t_11 != -1)) {
+ __Pyx_RaiseBufferIndexError(__pyx_t_11);
+ __PYX_ERR(0, 54, __pyx_L1_error)
+ }
+ *__Pyx_BufPtrStrided2d(__pyx_t_4bbox_DTYPE_t *, __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.buf, __pyx_t_9, __pyx_pybuffernd_overlaps.diminfo[0].strides, __pyx_t_12, __pyx_pybuffernd_overlaps.diminfo[1].strides) = (__pyx_t_22 / __pyx_v_ua);
+
+ /* "box_overlaps.pyx":48
+ * max(boxes[n, 1], query_boxes[k, 1]) + 1
+ * )
+ * if ih > 0: # <<<<<<<<<<<<<<
+ * ua = float(
+ * (boxes[n, 2] - boxes[n, 0] + 1) *
+ */
+ }
+
+ /* "box_overlaps.pyx":43
+ * max(boxes[n, 0], query_boxes[k, 0]) + 1
+ * )
+ * if iw > 0: # <<<<<<<<<<<<<<
+ * ih = (
+ * min(boxes[n, 3], query_boxes[k, 3]) -
+ */
+ }
+ }
+ }
+
+ /* "box_overlaps.pyx":55
+ * )
+ * overlaps[n, k] = iw * ih / ua
+ * return overlaps # <<<<<<<<<<<<<<
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject *)__pyx_v_overlaps));
+ __pyx_r = ((PyObject *)__pyx_v_overlaps);
+ goto __pyx_L0;
+
+ /* "box_overlaps.pyx":15
+ * ctypedef np.float_t DTYPE_t
+ *
+ * def bbox_overlaps( # <<<<<<<<<<<<<<
+ * np.ndarray[DTYPE_t, ndim=2] boxes,
+ * np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+ __Pyx_PyThreadState_declare
+ __Pyx_PyThreadState_assign
+ __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer);
+ __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+ __Pyx_AddTraceback("bbox.bbox_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ goto __pyx_L2;
+ __pyx_L0:;
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer);
+ __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer);
+ __pyx_L2:;
+ __Pyx_XDECREF((PyObject *)__pyx_v_overlaps);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":734
+ * ctypedef npy_cdouble complex_t
+ *
+ * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(1, a)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":735
+ *
+ * cdef inline object PyArray_MultiIterNew1(a):
+ * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 735, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":734
+ * ctypedef npy_cdouble complex_t
+ *
+ * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(1, a)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":737
+ * return PyArray_MultiIterNew(1, a)
+ *
+ * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(2, a, b)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":738
+ *
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 738, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":737
+ * return PyArray_MultiIterNew(1, a)
+ *
+ * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(2, a, b)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":740
+ * return PyArray_MultiIterNew(2, a, b)
+ *
+ * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(3, a, b, c)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":741
+ *
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 741, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":740
+ * return PyArray_MultiIterNew(2, a, b)
+ *
+ * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(3, a, b, c)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":743
+ * return PyArray_MultiIterNew(3, a, b, c)
+ *
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(4, a, b, c, d)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":744
+ *
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 744, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":743
+ * return PyArray_MultiIterNew(3, a, b, c)
+ *
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(4, a, b, c, d)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":746
+ * return PyArray_MultiIterNew(4, a, b, c, d)
+ *
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(5, a, b, c, d, e)
+ *
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":747
+ *
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<<
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 747, __pyx_L1_error)
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":746
+ * return PyArray_MultiIterNew(4, a, b, c, d)
+ *
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<<
+ * return PyArray_MultiIterNew(5, a, b, c, d, e)
+ *
+ */
+
+ /* function exit code */
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":749
+ * return PyArray_MultiIterNew(5, a, b, c, d, e)
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<<
+ * if PyDataType_HASSUBARRAY(d):
+ * return d.subarray.shape
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_t_1;
+ __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":750
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<<
+ * return d.subarray.shape
+ * else:
+ */
+ __pyx_t_1 = (PyDataType_HASSUBARRAY(__pyx_v_d) != 0);
+ if (__pyx_t_1) {
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":751
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ * if PyDataType_HASSUBARRAY(d):
+ * return d.subarray.shape # <<<<<<<<<<<<<<
+ * else:
+ * return ()
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject*)__pyx_v_d->subarray->shape));
+ __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape);
+ goto __pyx_L0;
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":750
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<<
+ * return d.subarray.shape
+ * else:
+ */
+ }
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":753
+ * return d.subarray.shape
+ * else:
+ * return () # <<<<<<<<<<<<<<
+ *
+ *
+ */
+ /*else*/ {
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(__pyx_empty_tuple);
+ __pyx_r = __pyx_empty_tuple;
+ goto __pyx_L0;
+ }
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":749
+ * return PyArray_MultiIterNew(5, a, b, c, d, e)
+ *
+ * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<<
+ * if PyDataType_HASSUBARRAY(d):
+ * return d.subarray.shape
+ */
+
+ /* function exit code */
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":928
+ * int _import_umath() except -1
+ *
+ * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<<
+ * Py_INCREF(base) # important to do this before stealing the reference below!
+ * PyArray_SetBaseObject(arr, base)
+ */
+
+static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("set_array_base", 0);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":929
+ *
+ * cdef inline void set_array_base(ndarray arr, object base):
+ * Py_INCREF(base) # important to do this before stealing the reference below! # <<<<<<<<<<<<<<
+ * PyArray_SetBaseObject(arr, base)
+ *
+ */
+ Py_INCREF(__pyx_v_base);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":930
+ * cdef inline void set_array_base(ndarray arr, object base):
+ * Py_INCREF(base) # important to do this before stealing the reference below!
+ * PyArray_SetBaseObject(arr, base) # <<<<<<<<<<<<<<
+ *
+ * cdef inline object get_array_base(ndarray arr):
+ */
+ (void)(PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base));
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":928
+ * int _import_umath() except -1
+ *
+ * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<<
+ * Py_INCREF(base) # important to do this before stealing the reference below!
+ * PyArray_SetBaseObject(arr, base)
+ */
+
+ /* function exit code */
+ __Pyx_RefNannyFinishContext();
+}
+
+/* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":932
+ * PyArray_SetBaseObject(arr, base)
+ *
+ * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<<
+ * base = PyArray_BASE(arr)
+ * if base is NULL:
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
+ PyObject *__pyx_v_base;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_t_1;
+ __Pyx_RefNannySetupContext("get_array_base", 0);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":933
+ *
+ * cdef inline object get_array_base(ndarray arr):
+ * base = PyArray_BASE(arr) # <<<<<<<<<<<<<<
+ * if base is NULL:
+ * return None
+ */
+ __pyx_v_base = PyArray_BASE(__pyx_v_arr);
+
+ /* "../../../../../../../../usr/local/lib/python3.10/site-packages/numpy/__init__.pxd":934
+ * cdef inline object get_array_base(ndarray arr):
+ * base = PyArray_BASE(arr)
+ * if base is NULL: # <<<<<<<<<<<<<<
+ * return None
+ * return