From 2ad2724ac83fbc115a84dccb236beb81b3b030f8 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Tue, 8 Oct 2024 14:49:07 +0800
Subject: [PATCH 01/18] Add: retinaface inference script.

---
 .../convnext_base/igie/README.md              |  47 +++++
 .../convnext_base/igie/build_engine.py        |  73 +++++++
 .../convnext_base/igie/export.py              |  61 ++++++
 .../convnext_base/igie/inference.py           | 186 ++++++++++++++++++
 .../infer_convnext_base_fp16_accuracy.sh      |  35 ++++
 .../infer_convnext_base_fp16_performance.sh   |  36 ++++
 6 files changed, 438 insertions(+)
 create mode 100644 models/cv/classification/convnext_base/igie/README.md
 create mode 100644 models/cv/classification/convnext_base/igie/build_engine.py
 create mode 100644 models/cv/classification/convnext_base/igie/export.py
 create mode 100644 models/cv/classification/convnext_base/igie/inference.py
 create mode 100644 models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_accuracy.sh
 create mode 100644 models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_performance.sh

diff --git a/models/cv/classification/convnext_base/igie/README.md b/models/cv/classification/convnext_base/igie/README.md
new file mode 100644
index 00000000..442ad963
--- /dev/null
+++ b/models/cv/classification/convnext_base/igie/README.md
@@ -0,0 +1,47 @@
+# ConvNext Base
+
+## Description
+
+The ConvNeXt Base model represents a significant stride in the evolution of convolutional neural networks (CNNs), introduced by researchers at Facebook AI Research (FAIR) and UC Berkeley. It is part of the ConvNeXt family, which challenges the dominance of Vision Transformers (ViTs) in the realm of visual recognition tasks.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/convnext_base-6075fbad.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight convnext_base-6075fbad.pth --output convnext_base.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_convnext_base_fp16_accuracy.sh
+# Performance
+bash scripts/infer_convnext_base_fp16_performance.sh
+```
+
+## Results
+
+| Model          | BatchSize | Precision | FPS     | Top-1(%) | Top-5(%) |
+| -------------- | --------- | --------- | ------- | -------- | -------- |
+| ConvNext Base  | 32        | FP16      | 589.669 | 83.661   | 96.699   |
diff --git a/models/cv/classification/convnext_base/igie/build_engine.py b/models/cv/classification/convnext_base/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/convnext_base/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/convnext_base/igie/export.py b/models/cv/classification/convnext_base/igie/export.py
new file mode 100644
index 00000000..d9a2fe01
--- /dev/null
+++ b/models/cv/classification/convnext_base/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.convnext_base()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/convnext_base/igie/inference.py b/models/cv/classification/convnext_base/igie/inference.py
new file mode 100644
index 00000000..3aef3ec7
--- /dev/null
+++ b/models/cv/classification/convnext_base/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+            
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_accuracy.sh b/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_accuracy.sh
new file mode 100644
index 00000000..42575772
--- /dev/null
+++ b/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="convnext_base.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path convnext_base_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                     \
+    --engine convnext_base_bs_${batchsize}_fp16.so       \
+    --batchsize ${batchsize}                             \
+    --input_name input                                   \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_performance.sh b/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_performance.sh
new file mode 100644
index 00000000..d5ae0649
--- /dev/null
+++ b/models/cv/classification/convnext_base/igie/scripts/infer_convnext_base_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="convnext_base.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path convnext_base_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                     \
+    --engine convnext_base_bs_${batchsize}_fp16.so       \
+    --batchsize ${batchsize}                             \
+    --input_name input                                   \
+    --datasets ${datasets_path}                          \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From 72c7d33b5a7055d53b3c15c1d7d9ba4e18bb0ecb Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Tue, 8 Oct 2024 16:41:27 +0800
Subject: [PATCH 02/18] Add: densenet201 inference script.

---
 .../classification/densenet201/igie/README.md |  48 +++++
 .../densenet201/igie/build_engine.py          |  73 +++++++
 .../classification/densenet201/igie/export.py |  74 +++++++
 .../densenet201/igie/inference.py             | 186 ++++++++++++++++++
 .../infer_densenet201_fp16_accuracy.sh        |  35 ++++
 .../infer_densenet201_fp16_performance.sh     |  36 ++++
 6 files changed, 452 insertions(+)
 create mode 100644 models/cv/classification/densenet201/igie/README.md
 create mode 100644 models/cv/classification/densenet201/igie/build_engine.py
 create mode 100644 models/cv/classification/densenet201/igie/export.py
 create mode 100644 models/cv/classification/densenet201/igie/inference.py
 create mode 100644 models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_accuracy.sh
 create mode 100644 models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_performance.sh

diff --git a/models/cv/classification/densenet201/igie/README.md b/models/cv/classification/densenet201/igie/README.md
new file mode 100644
index 00000000..595e338b
--- /dev/null
+++ b/models/cv/classification/densenet201/igie/README.md
@@ -0,0 +1,48 @@
+# DenseNet201
+
+## Description
+
+DenseNet201 is a deep convolutional neural network that stands out for its unique dense connection architecture, where each layer integrates features from all previous layers, effectively reusing features and reducing the number of parameters. This design not only enhances the network's information flow and parameter efficiency but also increases the model's regularization effect, helping to prevent overfitting. DenseNet201 consists of multiple dense blocks and transition layers, capable of capturing rich feature representations while maintaining computational efficiency, making it suitable for complex image recognition tasks.
+
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/densenet201-c1103571.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight densenet201-c1103571.pth --output densenet201.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_densenet201_fp16_accuracy.sh
+# Performance
+bash scripts/infer_densenet201_fp16_performance.sh
+```
+
+## Results
+
+| Model       | BatchSize | Precision | FPS      | Top-1(%) | Top-5(%) |
+| ----------- | --------- | --------- | -------- | -------- | -------- |
+| DenseNet201 | 32        | FP16      | 758.592  | 76.851   | 93.338   |
diff --git a/models/cv/classification/densenet201/igie/build_engine.py b/models/cv/classification/densenet201/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/densenet201/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/densenet201/igie/export.py b/models/cv/classification/densenet201/igie/export.py
new file mode 100644
index 00000000..66019547
--- /dev/null
+++ b/models/cv/classification/densenet201/igie/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+import re
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.densenet201(weights=False)
+
+    state_dict = torch.load(args.weight)
+
+    pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$'
+    )
+    for key in list(state_dict.keys()):
+        res = pattern.match(key)
+        if res:
+            new_key = res.group(1) + res.group(2)
+            state_dict[new_key] = state_dict[key]
+            del state_dict[key]
+
+    model.load_state_dict(state_dict)
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/densenet201/igie/inference.py b/models/cv/classification/densenet201/igie/inference.py
new file mode 100644
index 00000000..3aef3ec7
--- /dev/null
+++ b/models/cv/classification/densenet201/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+            
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_accuracy.sh b/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_accuracy.sh
new file mode 100644
index 00000000..470b285a
--- /dev/null
+++ b/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="densenet201.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path densenet201_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                  \
+    --engine densenet201_bs_${batchsize}_fp16.so      \
+    --batchsize ${batchsize}                          \
+    --input_name input                                \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_performance.sh b/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_performance.sh
new file mode 100644
index 00000000..e1ad69b7
--- /dev/null
+++ b/models/cv/classification/densenet201/igie/scripts/infer_densenet201_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="densenet201.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path densenet201_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                  \
+    --engine densenet201_bs_${batchsize}_fp16.so      \
+    --batchsize ${batchsize}                          \
+    --input_name input                                \
+    --datasets ${datasets_path}                       \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From bcba9323816c4bff98238256ae9bcce6553f8ecf Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Tue, 8 Oct 2024 17:09:37 +0800
Subject: [PATCH 03/18] Add: efficientnet_b3 inference script.

---
 .../efficientnet_b3/igie/README.md            |  47 +++++
 .../efficientnet_b3/igie/build_engine.py      |  73 +++++++
 .../efficientnet_b3/igie/export.py            |  61 ++++++
 .../efficientnet_b3/igie/inference.py         | 186 ++++++++++++++++++
 .../infer_efficientnet_b3_fp16_accuracy.sh    |  35 ++++
 .../infer_efficientnet_b3_fp16_performance.sh |  36 ++++
 6 files changed, 438 insertions(+)
 create mode 100644 models/cv/classification/efficientnet_b3/igie/README.md
 create mode 100644 models/cv/classification/efficientnet_b3/igie/build_engine.py
 create mode 100644 models/cv/classification/efficientnet_b3/igie/export.py
 create mode 100644 models/cv/classification/efficientnet_b3/igie/inference.py
 create mode 100644 models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_accuracy.sh
 create mode 100644 models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_performance.sh

diff --git a/models/cv/classification/efficientnet_b3/igie/README.md b/models/cv/classification/efficientnet_b3/igie/README.md
new file mode 100644
index 00000000..1331670b
--- /dev/null
+++ b/models/cv/classification/efficientnet_b3/igie/README.md
@@ -0,0 +1,47 @@
+# EfficientNet B3
+
+## Description
+
+EfficientNet B3 is a member of the EfficientNet family, a series of convolutional neural network architectures that are designed to achieve excellent accuracy and efficiency. Introduced by researchers at Google, EfficientNets utilize the compound scaling method, which uniformly scales the depth, width, and resolution of the network to improve accuracy and efficiency.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight efficientnet_b3_rwightman-b3899882.pth --output efficientnet_b3.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_efficientnet_b3_fp16_accuracy.sh
+# Performance
+bash scripts/infer_efficientnet_b3_fp16_performance.sh
+```
+
+## Results
+
+| Model           | BatchSize | Precision | FPS      | Top-1(%) | Top-5(%) |
+| --------------- | --------- | --------- | -------- | -------- | -------- |
+| Efficientnet_b3 | 32        | FP16      | 1144.391 | 78.503   | 94.340   |
diff --git a/models/cv/classification/efficientnet_b3/igie/build_engine.py b/models/cv/classification/efficientnet_b3/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/efficientnet_b3/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b3/igie/export.py b/models/cv/classification/efficientnet_b3/igie/export.py
new file mode 100644
index 00000000..f66e2cb0
--- /dev/null
+++ b/models/cv/classification/efficientnet_b3/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.efficientnet_b3()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/efficientnet_b3/igie/inference.py b/models/cv/classification/efficientnet_b3/igie/inference.py
new file mode 100644
index 00000000..3aef3ec7
--- /dev/null
+++ b/models/cv/classification/efficientnet_b3/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+            
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_accuracy.sh b/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_accuracy.sh
new file mode 100644
index 00000000..36e9a874
--- /dev/null
+++ b/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="efficientnet_b3.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path efficientnet_b3_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine efficientnet_b3_bs_${batchsize}_fp16.so      \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_performance.sh b/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_performance.sh
new file mode 100644
index 00000000..27b13c90
--- /dev/null
+++ b/models/cv/classification/efficientnet_b3/igie/scripts/infer_efficientnet_b3_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="efficientnet_b3.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path efficientnet_b3_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine efficientnet_b3_bs_${batchsize}_fp16.so      \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}                           \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From 589265d702113135f93d2c64073f7b4dd654e8db Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Wed, 9 Oct 2024 11:07:11 +0800
Subject: [PATCH 04/18] Add: efficientnet_v2_s inference script.

---
 .../efficientnet_v2_s/igie/README.md          |  47 +++++
 .../efficientnet_v2_s/igie/build_engine.py    |  73 +++++++
 .../efficientnet_v2_s/igie/export.py          |  61 ++++++
 .../efficientnet_v2_s/igie/inference.py       | 186 ++++++++++++++++++
 .../infer_efficientnet_v2_s_fp16_accuracy.sh  |  35 ++++
 ...nfer_efficientnet_v2_s_fp16_performance.sh |  36 ++++
 6 files changed, 438 insertions(+)
 create mode 100644 models/cv/classification/efficientnet_v2_s/igie/README.md
 create mode 100644 models/cv/classification/efficientnet_v2_s/igie/build_engine.py
 create mode 100644 models/cv/classification/efficientnet_v2_s/igie/export.py
 create mode 100644 models/cv/classification/efficientnet_v2_s/igie/inference.py
 create mode 100644 models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh
 create mode 100644 models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_performance.sh

diff --git a/models/cv/classification/efficientnet_v2_s/igie/README.md b/models/cv/classification/efficientnet_v2_s/igie/README.md
new file mode 100644
index 00000000..cd4fa6f8
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2_s/igie/README.md
@@ -0,0 +1,47 @@
+# EfficientNet_v2_s
+
+## Description
+
+EfficientNetV2 S is an optimized model in the EfficientNetV2 series, which was developed by Google researchers. It continues the legacy of the EfficientNet family, focusing on advancing the state-of-the-art in accuracy and efficiency through advanced scaling techniques and architectural innovations.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight efficientnet_v2_s-dd5fe13b.pth --output efficientnet_v2_s.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_efficientnet_v2_s_fp16_accuracy.sh
+# Performance
+bash scripts/infer_efficientnet_v2_s_fp16_performance.sh
+```
+
+## Results
+
+| Model             | BatchSize | Precision | FPS      | Top-1(%) | Top-5(%) |
+| ----------------- | --------- | --------- | -------- | -------- | -------- |
+| Efficientnet_v2_s | 32        | FP16      | 2357.457 | 81.290   |  95.242   |
diff --git a/models/cv/classification/efficientnet_v2_s/igie/build_engine.py b/models/cv/classification/efficientnet_v2_s/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2_s/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2_s/igie/export.py b/models/cv/classification/efficientnet_v2_s/igie/export.py
new file mode 100644
index 00000000..63b1b4c8
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2_s/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.efficientnet_v2_s()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/efficientnet_v2_s/igie/inference.py b/models/cv/classification/efficientnet_v2_s/igie/inference.py
new file mode 100644
index 00000000..3aef3ec7
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2_s/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+            
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh b/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh
new file mode 100644
index 00000000..2e7d1133
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="efficientnet_v2_s.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path efficientnet_v2_s_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine efficientnet_v2_s_bs_${batchsize}_fp16.so    \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_performance.sh b/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_performance.sh
new file mode 100644
index 00000000..4c67dd99
--- /dev/null
+++ b/models/cv/classification/efficientnet_v2_s/igie/scripts/infer_efficientnet_v2_s_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="efficientnet_v2_s.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path efficientnet_v2_s_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine efficientnet_v2_s_bs_${batchsize}_fp16.so    \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}                           \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From c587a7b5ece466429e432a34de96265d32b80c44 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Wed, 9 Oct 2024 11:31:30 +0800
Subject: [PATCH 05/18] Add: mnasnet0_5 inference script.

---
 .../classification/mnasnet0_5/igie/README.md  |  48 +++++
 .../mnasnet0_5/igie/build_engine.py           |  73 +++++++
 .../classification/mnasnet0_5/igie/export.py  |  61 ++++++
 .../mnasnet0_5/igie/inference.py              | 186 ++++++++++++++++++
 .../scripts/infer_mnasnet0_5_fp16_accuracy.sh |  35 ++++
 .../infer_mnasnet0_5_fp16_performance.sh      |  36 ++++
 6 files changed, 439 insertions(+)
 create mode 100644 models/cv/classification/mnasnet0_5/igie/README.md
 create mode 100644 models/cv/classification/mnasnet0_5/igie/build_engine.py
 create mode 100644 models/cv/classification/mnasnet0_5/igie/export.py
 create mode 100644 models/cv/classification/mnasnet0_5/igie/inference.py
 create mode 100644 models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_accuracy.sh
 create mode 100644 models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_performance.sh

diff --git a/models/cv/classification/mnasnet0_5/igie/README.md b/models/cv/classification/mnasnet0_5/igie/README.md
new file mode 100644
index 00000000..3055a187
--- /dev/null
+++ b/models/cv/classification/mnasnet0_5/igie/README.md
@@ -0,0 +1,48 @@
+# MNASNet0_5
+
+## Description
+
+MNASNet0_5 is a neural network architecture optimized for mobile devices, designed through neural architecture search technology. It is characterized by high efficiency and excellent accuracy, offering 50% higher accuracy than MobileNetV2 while maintaining low latency and memory usage. MNASNet0_5 widely uses depthwise separable convolutions, supports multi-scale inputs, and demonstrates good robustness, making it suitable for real-time image recognition tasks in resource-constrained environments.
+
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/mnasnet0.5_top1_67.823-3ffadce67e.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight mnasnet0.5_top1_67.823-3ffadce67e.pth --output mnasnet0_5.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_mnasnet0_5_fp16_accuracy.sh
+# Performance
+bash scripts/infer_mnasnet0_5_fp16_performance.sh
+```
+
+## Results
+
+| Model             | BatchSize | Precision | FPS      | Top-1(%) | Top-5(%) |
+| ----------------- | --------- | --------- | -------- | -------- | -------- |
+| MnasNet0_5        | 32        | FP16      | 7933.980 | 67.748   |  87.452  |
diff --git a/models/cv/classification/mnasnet0_5/igie/build_engine.py b/models/cv/classification/mnasnet0_5/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/mnasnet0_5/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/mnasnet0_5/igie/export.py b/models/cv/classification/mnasnet0_5/igie/export.py
new file mode 100644
index 00000000..bd48e206
--- /dev/null
+++ b/models/cv/classification/mnasnet0_5/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.mnasnet0_5()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/mnasnet0_5/igie/inference.py b/models/cv/classification/mnasnet0_5/igie/inference.py
new file mode 100644
index 00000000..3aef3ec7
--- /dev/null
+++ b/models/cv/classification/mnasnet0_5/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+            
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_accuracy.sh b/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_accuracy.sh
new file mode 100644
index 00000000..a1c3b37a
--- /dev/null
+++ b/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="mnasnet0_5.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path mnasnet0_5_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine mnasnet0_5_bs_${batchsize}_fp16.so           \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_performance.sh b/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_performance.sh
new file mode 100644
index 00000000..89271d33
--- /dev/null
+++ b/models/cv/classification/mnasnet0_5/igie/scripts/infer_mnasnet0_5_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="mnasnet0_5.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path mnasnet0_5_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine mnasnet0_5_bs_${batchsize}_fp16.so           \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}                           \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From 65c0c21f13e720d35a40d74f0211816624d1c160 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Wed, 9 Oct 2024 14:15:43 +0800
Subject: [PATCH 06/18] Add: regnet_y_1_6gf inference script.

---
 .../regnet_y_1_6gf/igie/README.md             |  47 +++++
 .../regnet_y_1_6gf/igie/build_engine.py       |  73 +++++++
 .../regnet_y_1_6gf/igie/export.py             |  61 ++++++
 .../regnet_y_1_6gf/igie/inference.py          | 186 ++++++++++++++++++
 .../infer_regnet_y_1_6gf_fp16_accuracy.sh     |  35 ++++
 .../infer_regnet_y_1_6gf_fp16_performance.sh  |  36 ++++
 6 files changed, 438 insertions(+)
 create mode 100644 models/cv/classification/regnet_y_1_6gf/igie/README.md
 create mode 100644 models/cv/classification/regnet_y_1_6gf/igie/build_engine.py
 create mode 100644 models/cv/classification/regnet_y_1_6gf/igie/export.py
 create mode 100644 models/cv/classification/regnet_y_1_6gf/igie/inference.py
 create mode 100644 models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_accuracy.sh
 create mode 100644 models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_performance.sh

diff --git a/models/cv/classification/regnet_y_1_6gf/igie/README.md b/models/cv/classification/regnet_y_1_6gf/igie/README.md
new file mode 100644
index 00000000..3f96b09b
--- /dev/null
+++ b/models/cv/classification/regnet_y_1_6gf/igie/README.md
@@ -0,0 +1,47 @@
+# RegNet_y_1_6gf
+
+## Description
+
+RegNet is a family of models designed for image classification tasks, as described in the paper "Designing Network Design Spaces". The RegNet design space provides simple and fast networks that work well across a wide range of computational budgets.The architecture of RegNet models is based on the principle of designing network design spaces, which allows for a more systematic exploration of possible network architectures. This makes it easier to understand and modify the architecture.RegNet_y_1_6gf is a specific model within the RegNet family, designed for image classification tasks.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/regnet_y_1_6gf-b11a554e.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight regnet_y_1_6gf-b11a554e.pth --output regnet_y_1_6gf.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_regnet_y_1_6gf_fp16_accuracy.sh
+# Performance
+bash scripts/infer_regnet_y_1_6gf_fp16_performance.sh
+```
+
+## Results
+
+Model             |BatchSize  |Precision |FPS      |Top-1(%) |Top-5(%)
+------------------|-----------|----------|---------|---------|--------
+RegNet_y_1_6gf    |    32     |   FP16   | 1785.44 | 77.933  | 93.948
diff --git a/models/cv/classification/regnet_y_1_6gf/igie/build_engine.py b/models/cv/classification/regnet_y_1_6gf/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/regnet_y_1_6gf/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/regnet_y_1_6gf/igie/export.py b/models/cv/classification/regnet_y_1_6gf/igie/export.py
new file mode 100644
index 00000000..f2a9f0a2
--- /dev/null
+++ b/models/cv/classification/regnet_y_1_6gf/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.regnet_y_1_6gf()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes,
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/regnet_y_1_6gf/igie/inference.py b/models/cv/classification/regnet_y_1_6gf/igie/inference.py
new file mode 100644
index 00000000..3aef3ec7
--- /dev/null
+++ b/models/cv/classification/regnet_y_1_6gf/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+            
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_accuracy.sh b/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_accuracy.sh
new file mode 100644
index 00000000..62d9cc11
--- /dev/null
+++ b/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="regnet_y_1_6gf.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path regnet_y_1_6gf_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine regnet_y_1_6gf_bs_${batchsize}_fp16.so       \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_performance.sh b/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_performance.sh
new file mode 100644
index 00000000..afe3a450
--- /dev/null
+++ b/models/cv/classification/regnet_y_1_6gf/igie/scripts/infer_regnet_y_1_6gf_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="regnet_y_1_6gf.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path regnet_y_1_6gf_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine regnet_y_1_6gf_bs_${batchsize}_fp16.so       \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}                           \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From 8cbaab5f888edc43085321e2e861a17aee187710 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Wed, 9 Oct 2024 15:04:53 +0800
Subject: [PATCH 07/18] Add: resnext101_64x4d inference script.

---
 .../resnext101_64x4d/igie/README.md           |  47 +++++
 .../resnext101_64x4d/igie/build_engine.py     |  73 +++++++
 .../resnext101_64x4d/igie/export.py           |  61 ++++++
 .../resnext101_64x4d/igie/inference.py        | 186 ++++++++++++++++++
 .../infer_resnext101_64x4d_fp16_accuracy.sh   |  35 ++++
 ...infer_resnext101_64x4d_fp16_performance.sh |  36 ++++
 6 files changed, 438 insertions(+)
 create mode 100644 models/cv/classification/resnext101_64x4d/igie/README.md
 create mode 100644 models/cv/classification/resnext101_64x4d/igie/build_engine.py
 create mode 100644 models/cv/classification/resnext101_64x4d/igie/export.py
 create mode 100644 models/cv/classification/resnext101_64x4d/igie/inference.py
 create mode 100644 models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_accuracy.sh
 create mode 100644 models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_performance.sh

diff --git a/models/cv/classification/resnext101_64x4d/igie/README.md b/models/cv/classification/resnext101_64x4d/igie/README.md
new file mode 100644
index 00000000..5b9846f2
--- /dev/null
+++ b/models/cv/classification/resnext101_64x4d/igie/README.md
@@ -0,0 +1,47 @@
+# ResNext101_64x4d
+
+## Description
+
+The ResNeXt101_64x4d is a deep learning model based on the deep residual network architecture, which enhances performance and efficiency through the use of grouped convolutions. With a depth of 101 layers and 64 filter groups, it is particularly suited for complex image recognition tasks. While maintaining excellent accuracy, it can adapt to various input sizes
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/resnext101_64x4d-173b62eb.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight resnext101_64x4d-173b62eb.pth --output resnext101_64x4d.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_resnext101_64x4d_fp16_accuracy.sh
+# Performance
+bash scripts/infer_resnext101_64x4d_fp16_performance.sh
+```
+
+## Results
+
+Model           |BatchSize  |Precision |FPS      |Top-1(%)  |Top-5(%)
+----------------|-----------|----------|---------|----------|--------
+ResNext101_64x4d|    32     |   FP16   | 663.13  |  82.953  | 96.221
diff --git a/models/cv/classification/resnext101_64x4d/igie/build_engine.py b/models/cv/classification/resnext101_64x4d/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/resnext101_64x4d/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/resnext101_64x4d/igie/export.py b/models/cv/classification/resnext101_64x4d/igie/export.py
new file mode 100644
index 00000000..43a20fca
--- /dev/null
+++ b/models/cv/classification/resnext101_64x4d/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.resnext101_64x4d()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/resnext101_64x4d/igie/inference.py b/models/cv/classification/resnext101_64x4d/igie/inference.py
new file mode 100644
index 00000000..3aef3ec7
--- /dev/null
+++ b/models/cv/classification/resnext101_64x4d/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+            
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_accuracy.sh b/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_accuracy.sh
new file mode 100644
index 00000000..edcb1a00
--- /dev/null
+++ b/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="resnext101_64x4d.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path resnext101_64x4d_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                     \
+    --engine resnext101_64x4d_bs_${batchsize}_fp16.so    \
+    --batchsize ${batchsize}                             \
+    --input_name input                                   \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_performance.sh b/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_performance.sh
new file mode 100644
index 00000000..3ccf3bc5
--- /dev/null
+++ b/models/cv/classification/resnext101_64x4d/igie/scripts/infer_resnext101_64x4d_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="resnext101_64x4d.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path resnext101_64x4d_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                     \
+    --engine resnext101_64x4d_bs_${batchsize}_fp16.so    \
+    --batchsize ${batchsize}                             \
+    --input_name input                                   \
+    --datasets ${datasets_path}                          \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From 67e3b05a41c8939a839776cf4ad3df7b1e08b76c Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Wed, 9 Oct 2024 15:35:26 +0800
Subject: [PATCH 08/18] Add: shufflenetv2_x1_5 inference script.

---
 .../shufflenetv2_x1_5/igie/README.md          |  47 +++++
 .../shufflenetv2_x1_5/igie/build_engine.py    |  73 +++++++
 .../shufflenetv2_x1_5/igie/export.py          |  61 ++++++
 .../shufflenetv2_x1_5/igie/inference.py       | 186 ++++++++++++++++++
 .../infer_shufflenetv2_x1_5_fp16_accuracy.sh  |  35 ++++
 ...nfer_shufflenetv2_x1_5_fp16_performance.sh |  36 ++++
 6 files changed, 438 insertions(+)
 create mode 100644 models/cv/classification/shufflenetv2_x1_5/igie/README.md
 create mode 100644 models/cv/classification/shufflenetv2_x1_5/igie/build_engine.py
 create mode 100644 models/cv/classification/shufflenetv2_x1_5/igie/export.py
 create mode 100644 models/cv/classification/shufflenetv2_x1_5/igie/inference.py
 create mode 100644 models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh
 create mode 100644 models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh

diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/README.md b/models/cv/classification/shufflenetv2_x1_5/igie/README.md
new file mode 100644
index 00000000..5bd87eab
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_5/igie/README.md
@@ -0,0 +1,47 @@
+# ShuffleNetV2_x1_5
+
+## Description
+
+ShuffleNetV2_x1_5 is a lightweight convolutional neural network specifically designed for efficient image recognition tasks on resource-constrained devices. It achieves high performance and low latency through the introduction of channel shuffling and pointwise group convolutions. Despite its small model size, it offers high accuracy and is suitable for a variety of vision tasks in mobile devices and embedded systems.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+```
+
+### Download
+
+Pretrained model: <https://download.pytorch.org/models/shufflenetv2_x1_5-3c479a10.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+python3 export.py --weight shufflenetv2_x1_5-3c479a10.pth --output shufflenetv2_x1_5.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh
+# Performance
+bash scripts/infer_shufflenetv2_x1_5_fp16_performance.sh
+```
+
+## Results
+
+| Model             | BatchSize | Precision | FPS      | Top-1(%) | Top-5(%) |
+| ----------------- | --------- | --------- | -------- | -------- | -------- |
+| ShuffleNetV2_x1_5 | 32        | FP16      | 7478.728 | 72.755   | 91.031   |
diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/build_engine.py b/models/cv/classification/shufflenetv2_x1_5/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_5/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/export.py b/models/cv/classification/shufflenetv2_x1_5/igie/export.py
new file mode 100644
index 00000000..0f89ba7c
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_5/igie/export.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import torch
+import torchvision
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = torchvision.models.shufflenet_v2_x1_5()
+    model.load_state_dict(torch.load(args.weight))
+    model.eval()
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )    
+    
+    print("Export onnx model successfully! ")
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/inference.py b/models/cv/classification/shufflenetv2_x1_5/igie/inference.py
new file mode 100644
index 00000000..3aef3ec7
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_5/igie/inference.py
@@ -0,0 +1,186 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+            
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh b/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh
new file mode 100644
index 00000000..9be9264c
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="shufflenetv2_x1_5.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path shufflenetv2_x1_5_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine shufflenetv2_x1_5_bs_${batchsize}_fp16.so    \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh b/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh
new file mode 100644
index 00000000..cc5a424d
--- /dev/null
+++ b/models/cv/classification/shufflenetv2_x1_5/igie/scripts/infer_shufflenetv2_x1_5_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="shufflenetv2_x1_5.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path shufflenetv2_x1_5_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                                      \
+    --engine shufflenetv2_x1_5_bs_${batchsize}_fp16.so    \
+    --batchsize ${batchsize}                              \
+    --input_name input                                    \
+    --datasets ${datasets_path}                           \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From 865c38337bc7be7043a8d3c649661db5a19b08b5 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Thu, 10 Oct 2024 16:48:26 +0800
Subject: [PATCH 09/18] Add: mvitv2_base inference script.

---
 .../classification/mvitv2_base/igie/README.md |  68 +++++++
 .../mvitv2_base/igie/build_engine.py          |  73 +++++++
 .../classification/mvitv2_base/igie/export.py |  74 +++++++
 .../mvitv2_base/igie/inference.py             | 185 ++++++++++++++++++
 .../infer_mvitv2_base_fp16_accuracy.sh        |  35 ++++
 .../infer_mvitv2_base_fp16_performance.sh     |  36 ++++
 6 files changed, 471 insertions(+)
 create mode 100644 models/cv/classification/mvitv2_base/igie/README.md
 create mode 100644 models/cv/classification/mvitv2_base/igie/build_engine.py
 create mode 100644 models/cv/classification/mvitv2_base/igie/export.py
 create mode 100644 models/cv/classification/mvitv2_base/igie/inference.py
 create mode 100644 models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_accuracy.sh
 create mode 100644 models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_performance.sh

diff --git a/models/cv/classification/mvitv2_base/igie/README.md b/models/cv/classification/mvitv2_base/igie/README.md
new file mode 100644
index 00000000..199a9597
--- /dev/null
+++ b/models/cv/classification/mvitv2_base/igie/README.md
@@ -0,0 +1,68 @@
+# MViTv2-base
+
+## Description
+
+MViTv2_base is an efficient multi-scale vision Transformer model designed specifically for image classification tasks. By employing a multi-scale structure and hierarchical representation, it effectively captures both global and local image features while maintaining computational efficiency. The MViTv2_base has demonstrated excellent performance on multiple standard datasets and is suitable for a variety of visual recognition tasks.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install onnxsim
+pip3 install mmcv==1.5.3
+pip3 install mmcls
+```
+
+### Download
+
+Pretrained model: <https://download.openmmlab.com/mmclassification/v0/mvit/mvitv2-base_3rdparty_in1k_20220722-9c4f0a17.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# git clone mmpretrain
+git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
+
+# export onnx model
+python3 export.py --cfg mmpretrain/configs/mvit/mvitv2-base_8xb256_in1k.py --weight mvitv2-base_3rdparty_in1k_20220722-9c4f0a17.pth --output mvitv2_base.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim mvitv2_base.onnx mvitv2_base_opt.onnx
+
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_mvitv2_base_fp16_accuracy.sh
+# Performance
+bash scripts/infer_mvitv2_base_fp16_performance.sh
+```
+
+## Results
+
+| Model       | BatchSize | Precision | FPS      | Top-1(%) | Top-5(%) |
+| ----------- | --------- | --------- | -------- | -------- | -------- |
+| MViTv2-base | 16        | FP16      | 58.76    | 84.226   | 96.848   |
+
+## Reference
+
+MViTv2-base: <https://github.com/open-mmlab/mmpretrain>
diff --git a/models/cv/classification/mvitv2_base/igie/build_engine.py b/models/cv/classification/mvitv2_base/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/mvitv2_base/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/mvitv2_base/igie/export.py b/models/cv/classification/mvitv2_base/igie/export.py
new file mode 100644
index 00000000..d78b898b
--- /dev/null
+++ b/models/cv/classification/mvitv2_base/igie/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import argparse
+
+import torch
+from mmcls.apis import init_model
+
+class Model(torch.nn.Module):
+    def __init__(self, config_file, checkpoint_file):
+        super().__init__()
+        self.model = init_model(config_file, checkpoint_file, device="cpu")
+  
+    def forward(self, x):
+        head = self.model.simple_test(x)
+        return head
+    
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+
+    parser.add_argument("--cfg", 
+                    type=str, 
+                    required=True, 
+                    help="model config file.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+
+    config_file = args.cfg
+    checkpoint_file = args.weight
+    model = Model(config_file, checkpoint_file).eval()
+    
+    input_names = ['input']
+    output_names = ['output']
+    dummy_input = torch.randn(16, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        output_names = output_names,
+        opset_version=13
+    )
+
+    print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+    main()
+
diff --git a/models/cv/classification/mvitv2_base/igie/inference.py b/models/cv/classification/mvitv2_base/igie/inference.py
new file mode 100644
index 00000000..c42cf871
--- /dev/null
+++ b/models/cv/classification/mvitv2_base/igie/inference.py
@@ -0,0 +1,185 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+    
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+    
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_accuracy.sh b/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_accuracy.sh
new file mode 100644
index 00000000..85f66d50
--- /dev/null
+++ b/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=16
+model_path="mvitv2_base_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path mvitv2_base_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                              \
+    --engine mvitv2_base_bs_${batchsize}_fp16.so  \
+    --batchsize ${batchsize}                      \
+    --input_name input                            \
+    --datasets ${datasets_path}
diff --git a/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_performance.sh b/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_performance.sh
new file mode 100644
index 00000000..d54dac50
--- /dev/null
+++ b/models/cv/classification/mvitv2_base/igie/scripts/infer_mvitv2_base_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=16
+model_path="mvitv2_base_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path mvitv2_base_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                              \
+    --engine mvitv2_base_bs_${batchsize}_fp16.so  \
+    --batchsize ${batchsize}                      \
+    --input_name input                            \
+    --datasets ${datasets_path}                   \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From 2f249448a4652e6b9ed0631d86bc63547a97dd90 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Thu, 10 Oct 2024 17:34:57 +0800
Subject: [PATCH 10/18] Add: resnetv1d50 inference script.

---
 .../classification/resnetv1d50/igie/README.md |  64 ++++++
 .../resnetv1d50/igie/build_engine.py          |  73 +++++++
 .../classification/resnetv1d50/igie/export.py |  76 +++++++
 .../resnetv1d50/igie/inference.py             | 185 ++++++++++++++++++
 .../infer_resnetv1d50_fp16_accuracy.sh        |  35 ++++
 .../infer_resnetv1d50_fp16_performance.sh     |  36 ++++
 6 files changed, 469 insertions(+)
 create mode 100644 models/cv/classification/resnetv1d50/igie/README.md
 create mode 100644 models/cv/classification/resnetv1d50/igie/build_engine.py
 create mode 100644 models/cv/classification/resnetv1d50/igie/export.py
 create mode 100644 models/cv/classification/resnetv1d50/igie/inference.py
 create mode 100644 models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_accuracy.sh
 create mode 100644 models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_performance.sh

diff --git a/models/cv/classification/resnetv1d50/igie/README.md b/models/cv/classification/resnetv1d50/igie/README.md
new file mode 100644
index 00000000..0609b34f
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/igie/README.md
@@ -0,0 +1,64 @@
+# ResNetV1D-50
+
+## Description
+
+ResNetV1D-50 is an enhanced version of ResNetV1-50 that incorporates changes like dilated convolutions and adjusted downsampling, leading to better performance in large-scale image classification tasks. Its ability to capture richer image features makes it a popular choice in deep learning models. 
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install mmcv==1.5.3
+pip3 install mmcls
+```
+
+### Download
+
+Pretrained model: <https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth>
+
+Dataset: <https://www.image-net.org/download.php> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# git clone mmpretrain
+git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
+
+# export onnx model
+python3 export.py --cfg mmpretrain/configs/resnet/resnetv1d50_b32x8_imagenet.py --weight resnetv1d50_b32x8_imagenet_20210531-db14775a.pth --output resnetv1d50.onnx
+
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/imagenet_val/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_resnetv1d50_fp16_accuracy.sh
+# Performance
+bash scripts/infer_resnetv1d50_fp16_performance.sh
+```
+
+## Results
+
+| Model        | BatchSize | Precision | FPS      | Top-1(%) | Top-5(%) |
+| ------------ | --------- | --------- | -------- | -------- | -------- |
+| ResNetV1D-50 | 32        | FP16      | 4017.92  | 77.517   | 93.538   |
+
+## Reference
+
+ResNetV1D-50: <https://github.com/open-mmlab/mmpretrain>
diff --git a/models/cv/classification/resnetv1d50/igie/build_engine.py b/models/cv/classification/resnetv1d50/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/igie/export.py b/models/cv/classification/resnetv1d50/igie/export.py
new file mode 100644
index 00000000..84045099
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/igie/export.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import argparse
+
+import torch
+from mmcls.apis import init_model
+
+class Model(torch.nn.Module):
+    def __init__(self, config_file, checkpoint_file):
+        super().__init__()
+        self.model = init_model(config_file, checkpoint_file, device="cpu")
+  
+    def forward(self, x):
+        head = self.model.simple_test(x)
+        return head
+    
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+
+    parser.add_argument("--cfg", 
+                    type=str, 
+                    required=True, 
+                    help="model config file.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+
+    config_file = args.cfg
+    checkpoint_file = args.weight
+    model = Model(config_file, checkpoint_file).eval()
+    
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 224, 224)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )
+
+    print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+    main()
+
diff --git a/models/cv/classification/resnetv1d50/igie/inference.py b/models/cv/classification/resnetv1d50/igie/inference.py
new file mode 100644
index 00000000..c42cf871
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/igie/inference.py
@@ -0,0 +1,185 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    dataset = torchvision.datasets.ImageFolder(
+        data_path,
+        transforms.Compose(
+            [
+                transforms.Resize(256),
+                transforms.CenterCrop(224),
+                transforms.PILToTensor(),
+                transforms.ConvertImageDtype(torch.float),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225)
+                )
+            ]
+        )
+    )
+
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size, num_workers=num_workers)
+
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+    
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+    
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_accuracy.sh b/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_accuracy.sh
new file mode 100644
index 00000000..c3a3d869
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="resnetv1d50.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path resnetv1d50_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                              \
+    --engine resnetv1d50_bs_${batchsize}_fp16.so  \
+    --batchsize ${batchsize}                      \
+    --input_name input                            \
+    --datasets ${datasets_path}
diff --git a/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_performance.sh b/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_performance.sh
new file mode 100644
index 00000000..194d2900
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/igie/scripts/infer_resnetv1d50_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="resnetv1d50.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,224,224    \
+    --precision fp16                        \
+    --engine_path resnetv1d50_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                              \
+    --engine resnetv1d50_bs_${batchsize}_fp16.so  \
+    --batchsize ${batchsize}                      \
+    --input_name input                            \
+    --datasets ${datasets_path}                   \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From c4c606a6237831e3ab8f0731fe2b9433e4207477 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Fri, 11 Oct 2024 15:10:22 +0800
Subject: [PATCH 11/18] Add: paa inference script.

---
 models/cv/detection/paa/igie/README.md        |  65 +++++++
 .../detection/paa/igie/base/coco_detection.py |  75 +++++++++
 .../paa/igie/base/default_runtime.py          |  39 +++++
 .../cv/detection/paa/igie/base/schedule_1x.py |  43 +++++
 models/cv/detection/paa/igie/build_engine.py  |  73 ++++++++
 .../cv/detection/paa/igie/deploy_default.py   |  41 +++++
 models/cv/detection/paa/igie/export.py        |  74 ++++++++
 models/cv/detection/paa/igie/inference.py     | 158 ++++++++++++++++++
 .../detection/paa/igie/paa_r50_fpn_1x_coco.py |  80 +++++++++
 .../igie/scripts/infer_paa_fp16_accuracy.sh   |  35 ++++
 .../scripts/infer_paa_fp16_performance.sh     |  36 ++++
 11 files changed, 719 insertions(+)
 create mode 100644 models/cv/detection/paa/igie/README.md
 create mode 100644 models/cv/detection/paa/igie/base/coco_detection.py
 create mode 100644 models/cv/detection/paa/igie/base/default_runtime.py
 create mode 100644 models/cv/detection/paa/igie/base/schedule_1x.py
 create mode 100644 models/cv/detection/paa/igie/build_engine.py
 create mode 100644 models/cv/detection/paa/igie/deploy_default.py
 create mode 100644 models/cv/detection/paa/igie/export.py
 create mode 100644 models/cv/detection/paa/igie/inference.py
 create mode 100644 models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py
 create mode 100644 models/cv/detection/paa/igie/scripts/infer_paa_fp16_accuracy.sh
 create mode 100644 models/cv/detection/paa/igie/scripts/infer_paa_fp16_performance.sh

diff --git a/models/cv/detection/paa/igie/README.md b/models/cv/detection/paa/igie/README.md
new file mode 100644
index 00000000..bc9e0b3e
--- /dev/null
+++ b/models/cv/detection/paa/igie/README.md
@@ -0,0 +1,65 @@
+# PAA
+
+## Description
+
+PAA (Probabilistic Anchor Assignment) is an algorithm for object detection that adaptively assigns positive and negative anchor samples using a probabilistic model. It employs a Gaussian mixture model to dynamically select positive and negative samples based on score distribution, avoiding the misassignment issues of traditional IoU threshold-based methods. PAA enhances detection accuracy, particularly in complex scenarios, and is compatible with existing detection frameworks.
+
+## Setup
+
+### Install
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
+pip3 install onnx
+pip3 install tqdm
+pip3 install onnxsim
+pip3 install mmdet
+pip3 install mmdeploy
+pip3 install mmengine
+```
+
+### Download
+
+Pretrained model: <https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.pth>
+
+Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
+
+### Model Conversion
+
+```bash
+# export onnx model
+python3 export.py --weight paa_r50_fpn_1x_coco_20200821-936edec3.pth --cfg paa_r50_fpn_1x_coco.py --output paa.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim paa.onnx paa_opt.onnx
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_paa_fp16_accuracy.sh
+# Performance
+bash scripts/infer_paa_fp16_performance.sh
+```
+
+## Results
+
+Model  |BatchSize  |Precision |FPS       |IOU@0.5   |IOU@0.5:0.95   |
+-------|-----------|----------|----------|----------|---------------|
+PAA    |    32     |   FP16   | 138.414  |  0.551   |  0.377        |
+
+## Reference
+
+mmdetection: <https://github.com/open-mmlab/mmdetection.git>
diff --git a/models/cv/detection/paa/igie/base/coco_detection.py b/models/cv/detection/paa/igie/base/coco_detection.py
new file mode 100644
index 00000000..9abe9e69
--- /dev/null
+++ b/models/cv/detection/paa/igie/base/coco_detection.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+
+backend_args = None
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='Resize', scale=(800, 800), keep_ratio=False),
+    # If you don't have a gt annotation, delete the pipeline
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=train_pipeline,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=test_pipeline,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/instances_val2017.json',
+    metric='bbox',
+    format_only=False,
+    backend_args=backend_args)
+test_evaluator = val_evaluator
\ No newline at end of file
diff --git a/models/cv/detection/paa/igie/base/default_runtime.py b/models/cv/detection/paa/igie/base/default_runtime.py
new file mode 100644
index 00000000..609d8037
--- /dev/null
+++ b/models/cv/detection/paa/igie/base/default_runtime.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+default_scope = 'mmdet'
+
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=50),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(type='CheckpointHook', interval=1),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='DetVisualizationHook'))
+
+env_cfg = dict(
+    cudnn_benchmark=False,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'),
+)
+
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
+
+log_level = 'INFO'
+load_from = None
+resume = False
diff --git a/models/cv/detection/paa/igie/base/schedule_1x.py b/models/cv/detection/paa/igie/base/schedule_1x.py
new file mode 100644
index 00000000..9b16d80c
--- /dev/null
+++ b/models/cv/detection/paa/igie/base/schedule_1x.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+# training schedule for 1x
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
+
+# optimizer
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
+
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=16)
diff --git a/models/cv/detection/paa/igie/build_engine.py b/models/cv/detection/paa/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/detection/paa/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/detection/paa/igie/deploy_default.py b/models/cv/detection/paa/igie/deploy_default.py
new file mode 100644
index 00000000..b8d8e43d
--- /dev/null
+++ b/models/cv/detection/paa/igie/deploy_default.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+onnx_config = dict(
+    type='onnx',
+    export_params=True,
+    keep_initializers_as_inputs=False,
+    opset_version=11,
+    save_file='end2end.onnx',
+    input_names=['input'],
+    output_names=['output'],
+    input_shape=None,
+    optimize=True)
+
+codebase_config = dict(
+    type='mmdet',
+    task='ObjectDetection',
+    model_type='end2end',
+    post_processing=dict(
+        score_threshold=0.05,
+        confidence_threshold=0.005,
+        iou_threshold=0.5,
+        max_output_boxes_per_class=200,
+        pre_top_k=5000,
+        keep_top_k=100,
+        background_label_id=-1,
+    ))
+
+backend_config = dict(type='onnxruntime')
\ No newline at end of file
diff --git a/models/cv/detection/paa/igie/export.py b/models/cv/detection/paa/igie/export.py
new file mode 100644
index 00000000..bceaba78
--- /dev/null
+++ b/models/cv/detection/paa/igie/export.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import argparse
+
+import torch
+from mmdeploy.utils import load_config
+from mmdeploy.apis import build_task_processor
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+
+    parser.add_argument("--cfg", 
+                    type=str, 
+                    required=True, 
+                    help="model config file.")
+    
+    parser.add_argument("--output", 
+                    type=str, 
+                    required=True, 
+                    help="export onnx model path.")
+    
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = parse_args()
+
+    deploy_cfg = 'deploy_default.py'
+    model_cfg = args.cfg
+    model_checkpoint = args.weight
+
+    deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
+
+    task_processor = build_task_processor(model_cfg, deploy_cfg, device='cpu')
+
+    model = task_processor.build_pytorch_model(model_checkpoint)
+
+    input_names = ['input']
+    output_names = ['output']
+    dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}}
+    dummy_input = torch.randn(1, 3, 800, 800)
+
+    torch.onnx.export(
+        model, 
+        dummy_input, 
+        args.output, 
+        input_names = input_names, 
+        dynamic_axes = dynamic_axes, 
+        output_names = output_names,
+        opset_version=13
+    )
+
+    print("Export onnx model successfully! ")
+
+if __name__ == '__main__':
+    main()
+
diff --git a/models/cv/detection/paa/igie/inference.py b/models/cv/detection/paa/igie/inference.py
new file mode 100644
index 00000000..5b93aefc
--- /dev/null
+++ b/models/cv/detection/paa/igie/inference.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from mmdet.registry import RUNNERS
+from mmengine.config import Config
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # Runner config
+        cfg = Config.fromfile("paa_r50_fpn_1x_coco.py")
+
+        cfg.work_dir = "./workspace"
+        cfg['test_dataloader']['batch_size'] = batch_size
+        cfg['test_dataloader']['dataset']['data_root'] = args.datasets
+        cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
+        cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+        cfg['log_level'] = "ERROR"
+        
+        runner = RUNNERS.build(cfg)
+        
+        for input_data in tqdm(runner.test_dataloader):
+            
+            input_data = runner.model.data_preprocessor(input_data, False)
+            image = input_data['inputs'].cpu()
+            
+            pad_batch = len(image) != batch_size
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+            
+            module.run()
+            
+            cls_score = []
+            box_reg = []
+            score_factors = []
+
+            for i in range(module.get_num_outputs()):
+                output = module.get_output(i).asnumpy()
+
+                if pad_batch:
+                    output = output[:origin_size]
+
+                output = torch.from_numpy(output)
+                
+                if output.shape[1] == 80:
+                    cls_score.append(output)
+                elif output.shape[1] == 4:
+                    box_reg.append(output)
+                else:
+                    score_factors.append(output)
+
+            batch_img_metas = [
+                data_samples.metainfo for data_samples in input_data['data_samples']
+            ]
+            
+            results_list = runner.model.bbox_head.predict_by_feat(cls_score, box_reg, score_factors, batch_img_metas=batch_img_metas, rescale=True)
+
+            batch_data_samples = runner.model.add_pred_to_datasample(input_data['data_samples'], results_list)
+
+            runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=input_data)
+
+        metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py b/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py
new file mode 100644
index 00000000..a625e801
--- /dev/null
+++ b/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py
@@ -0,0 +1,80 @@
+_base_ = [
+    'base/coco_detection.py',
+    'base/schedule_1x.py', 'base/default_runtime.py'
+]
+
+# model settings
+model = dict(
+    type='PAA',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5),
+    bbox_head=dict(
+        type='PAAHead',
+        reg_decoded_bbox=True,
+        score_voting=True,
+        topk=9,
+        num_classes=80,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            ratios=[1.0],
+            octave_base_scale=8,
+            scales_per_octave=1,
+            strides=[8, 16, 32, 64, 128]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[0.1, 0.1, 0.2, 0.2]),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
+        loss_centerness=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),
+    # training and testing settings
+    train_cfg=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.1,
+            neg_iou_thr=0.1,
+            min_pos_iou=0,
+            ignore_iof_thr=-1),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100))
+# optimizer
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
\ No newline at end of file
diff --git a/models/cv/detection/paa/igie/scripts/infer_paa_fp16_accuracy.sh b/models/cv/detection/paa/igie/scripts/infer_paa_fp16_accuracy.sh
new file mode 100644
index 00000000..9909aa30
--- /dev/null
+++ b/models/cv/detection/paa/igie/scripts/infer_paa_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="paa_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,800,800    \
+    --precision fp16                        \
+    --engine_path paa_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine paa_bs_${batchsize}_fp16.so      \
+    --batchsize ${batchsize}                  \
+    --input_name input                        \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/detection/paa/igie/scripts/infer_paa_fp16_performance.sh b/models/cv/detection/paa/igie/scripts/infer_paa_fp16_performance.sh
new file mode 100644
index 00000000..49d54558
--- /dev/null
+++ b/models/cv/detection/paa/igie/scripts/infer_paa_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="paa_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input input:${batchsize},3,800,800    \
+    --precision fp16                        \
+    --engine_path paa_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine paa_bs_${batchsize}_fp16.so      \
+    --batchsize ${batchsize}                  \
+    --input_name input                        \
+    --datasets ${datasets_path}               \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From 49ab199652dbd155ca3c20b7018e7b938f9272e6 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Fri, 11 Oct 2024 16:13:18 +0800
Subject: [PATCH 12/18] Add: yolov9 inference script.

---
 models/cv/detection/yolov9/igie/README.md     |  50 ++++++++
 .../cv/detection/yolov9/igie/build_engine.py  |  73 +++++++++++
 models/cv/detection/yolov9/igie/coco.yaml     |  97 +++++++++++++++
 models/cv/detection/yolov9/igie/export.py     |  43 +++++++
 models/cv/detection/yolov9/igie/inference.py  | 115 ++++++++++++++++++
 .../scripts/infer_yolov9_fp16_accuracy.sh     |  35 ++++++
 .../scripts/infer_yolov9_fp16_performance.sh  |  36 ++++++
 models/cv/detection/yolov9/igie/validator.py  |  89 ++++++++++++++
 8 files changed, 538 insertions(+)
 create mode 100644 models/cv/detection/yolov9/igie/README.md
 create mode 100644 models/cv/detection/yolov9/igie/build_engine.py
 create mode 100644 models/cv/detection/yolov9/igie/coco.yaml
 create mode 100644 models/cv/detection/yolov9/igie/export.py
 create mode 100644 models/cv/detection/yolov9/igie/inference.py
 create mode 100644 models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_accuracy.sh
 create mode 100644 models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_performance.sh
 create mode 100644 models/cv/detection/yolov9/igie/validator.py

diff --git a/models/cv/detection/yolov9/igie/README.md b/models/cv/detection/yolov9/igie/README.md
new file mode 100644
index 00000000..cdeb8119
--- /dev/null
+++ b/models/cv/detection/yolov9/igie/README.md
@@ -0,0 +1,50 @@
+# YOLOv9
+
+## Description
+YOLOv9 represents a major leap in real-time object detection by introducing innovations like Programmable Gradient Information (PGI) and the Generalized Efficient Layer Aggregation Network (GELAN), significantly improving efficiency, accuracy, and adaptability. Developed by an open-source team and building on the YOLOv5 codebase, it sets new benchmarks on the MS COCO dataset. YOLOv9's architecture effectively addresses information loss in deep neural networks, enhancing learning capacity and ensuring higher detection accuracy.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install onnx
+pip3 install tqdm
+pip3 install onnxsim
+
+pip3 install ultralytics==8.2.51
+```
+
+### Download
+
+Pretrained model: <https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov9s.pt>
+
+## Model Conversion
+```bash
+python3 export.py --weight yolov9s.pt --batch 32
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_yolov9_fp16_accuracy.sh
+# Performance
+bash scripts/infer_yolov9_fp16_performance.sh
+```
+
+## Results
+
+Model  |BatchSize  |Precision |FPS       |IOU@0.5   |IOU@0.5:0.95   |
+-------|-----------|----------|----------|----------|---------------|
+YOLOv9 |    32     |   FP16   | 814.42   |  0.625   |  0.464        |
+
+## Reference
+
+YOLOv9: <https://docs.ultralytics.com/models/yolov9/>
\ No newline at end of file
diff --git a/models/cv/detection/yolov9/igie/build_engine.py b/models/cv/detection/yolov9/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/detection/yolov9/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/detection/yolov9/igie/coco.yaml b/models/cv/detection/yolov9/igie/coco.yaml
new file mode 100644
index 00000000..66731c6c
--- /dev/null
+++ b/models/cv/detection/yolov9/igie/coco.yaml
@@ -0,0 +1,97 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO 2017 dataset http://cocodataset.org by Microsoft
+# Example usage: yolo train data=coco.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco  ← downloads here (20.1 GB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../../../../data/datasets/coco/ # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # val images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
diff --git a/models/cv/detection/yolov9/igie/export.py b/models/cv/detection/yolov9/igie/export.py
new file mode 100644
index 00000000..aec62f72
--- /dev/null
+++ b/models/cv/detection/yolov9/igie/export.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+from ultralytics import YOLO
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--batch", 
+                type=int, 
+                required=True, 
+                help="batchsize of the model.")
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = YOLO(args.weight).cpu()
+    
+    model.export(format='onnx', batch=args.batch, imgsz=(640, 640), optimize=True, simplify=True, opset=13)
+    
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/detection/yolov9/igie/inference.py b/models/cv/detection/yolov9/igie/inference.py
new file mode 100644
index 00000000..9339ffe9
--- /dev/null
+++ b/models/cv/detection/yolov9/igie/inference.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+import os
+
+import tvm
+from tvm import relay
+
+import numpy as np
+from pathlib import Path
+from ultralytics import YOLO
+from ultralytics.cfg import get_cfg
+from ultralytics.utils import DEFAULT_CFG
+from validator import IGIE_Validator
+
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        overrides = {}
+        overrides['mode'] = 'val'
+
+        cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
+
+        cfg_args.batch = args.batchsize
+        cfg_args.data = './coco.yaml'
+        cfg_args.save_json = True
+        
+        validator = IGIE_Validator(args=cfg_args, save_dir=Path('.'))
+        validator.stride = 32
+        
+        stats = validator(module, device)
+
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_accuracy.sh b/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_accuracy.sh
new file mode 100644
index 00000000..2158b3bc
--- /dev/null
+++ b/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="yolov9s.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input images:${batchsize},3,640,640   \
+    --precision fp16                        \
+    --engine_path yolov9s_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine yolov9s_bs_${batchsize}_fp16.so  \
+    --batchsize ${batchsize}                  \
+    --input_name images                       \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_performance.sh b/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_performance.sh
new file mode 100644
index 00000000..59640316
--- /dev/null
+++ b/models/cv/detection/yolov9/igie/scripts/infer_yolov9_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="yolov9s.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input images:${batchsize},3,640,640   \
+    --precision fp16                        \
+    --engine_path yolov9s_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine yolov9s_bs_${batchsize}_fp16.so  \
+    --batchsize ${batchsize}                  \
+    --input_name images                       \
+    --datasets ${datasets_path}               \
+    --perf_only True
\ No newline at end of file
diff --git a/models/cv/detection/yolov9/igie/validator.py b/models/cv/detection/yolov9/igie/validator.py
new file mode 100644
index 00000000..08085747
--- /dev/null
+++ b/models/cv/detection/yolov9/igie/validator.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import tvm
+import json
+import torch
+import numpy as np
+
+from tqdm import tqdm
+
+from ultralytics.models.yolo.detect import DetectionValidator
+from ultralytics.data.utils import check_det_dataset
+from ultralytics.utils.metrics import ConfusionMatrix
+from ultralytics.data.converter import coco80_to_coco91_class
+
+class IGIE_Validator(DetectionValidator):
+    def __call__(self, engine, device):
+        self.data = check_det_dataset(self.args.data)
+        self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch)
+        self.init_metrics()
+
+        self.stats = {'tp': [], 'conf': [], 'pred_cls': [], 'target_cls': [], 'target_img': []}
+
+        # wram up
+        for _ in range(3):
+            engine.run()
+
+        for batch in tqdm(self.dataloader):
+            batch = self.preprocess(batch)
+
+            imgs = batch['img']
+            pad_batch = len(imgs) != self.args.batch
+            if pad_batch:
+                origin_size = len(imgs)
+                imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:]))
+            
+            engine.set_input(0, tvm.nd.array(imgs, device))
+            
+            engine.run()
+            
+            outputs = engine.get_output(0).asnumpy()
+
+            if pad_batch:
+                outputs = outputs[:origin_size]
+            
+            outputs = torch.from_numpy(outputs)
+            
+            preds = self.postprocess([outputs])
+            
+            self.update_metrics(preds, batch)
+        
+        stats = self.get_stats()
+
+        if self.args.save_json and self.jdict:
+            with open(str(self.save_dir / 'predictions.json'), 'w') as f:
+                print(f'Saving {f.name} ...')
+                json.dump(self.jdict, f)  # flatten and save
+
+        stats = self.eval_json(stats)
+
+        return stats
+
+    def init_metrics(self):
+        """Initialize evaluation metrics for YOLO."""
+        val = self.data.get(self.args.split, '')  # validation path
+        self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt')  # is COCO
+        self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000))
+        self.args.save_json |= self.is_coco and not self.training  # run on final val if training COCO
+        self.names = self.data['names']
+        self.nc = len(self.names)
+        self.metrics.names = self.names
+        self.confusion_matrix = ConfusionMatrix(nc=80)
+        self.seen = 0
+        self.jdict = []
+        self.stats = []
+
-- 
Gitee


From aa200a08807df836f7052b12320d35650681721a Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Sat, 12 Oct 2024 10:54:13 +0800
Subject: [PATCH 13/18] Add: yolov10 inference script.

---
 models/cv/detection/yolov10/igie/README.md    |  53 ++++++++
 .../cv/detection/yolov10/igie/build_engine.py |  73 +++++++++++
 models/cv/detection/yolov10/igie/coco.yaml    |  97 +++++++++++++++
 models/cv/detection/yolov10/igie/export.py    |  43 +++++++
 models/cv/detection/yolov10/igie/inference.py | 113 ++++++++++++++++++
 .../scripts/infer_yolov10_fp16_accuracy.sh    |  35 ++++++
 .../scripts/infer_yolov10_fp16_performance.sh |  36 ++++++
 models/cv/detection/yolov10/igie/validator.py |  89 ++++++++++++++
 8 files changed, 539 insertions(+)
 create mode 100644 models/cv/detection/yolov10/igie/README.md
 create mode 100644 models/cv/detection/yolov10/igie/build_engine.py
 create mode 100644 models/cv/detection/yolov10/igie/coco.yaml
 create mode 100644 models/cv/detection/yolov10/igie/export.py
 create mode 100644 models/cv/detection/yolov10/igie/inference.py
 create mode 100644 models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_accuracy.sh
 create mode 100644 models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_performance.sh
 create mode 100644 models/cv/detection/yolov10/igie/validator.py

diff --git a/models/cv/detection/yolov10/igie/README.md b/models/cv/detection/yolov10/igie/README.md
new file mode 100644
index 00000000..088b0907
--- /dev/null
+++ b/models/cv/detection/yolov10/igie/README.md
@@ -0,0 +1,53 @@
+# YOLOv10
+
+## Description
+YOLOv10, built on the Ultralytics Python package by researchers at Tsinghua University, introduces a new approach to real-time object detection, addressing both the post-processing and model architecture deficiencies found in previous YOLO versions. By eliminating non-maximum suppression (NMS) and optimizing various model components, YOLOv10 achieves state-of-the-art performance with significantly reduced computational overhead. Extensive experiments demonstrate its superior accuracy-latency trade-offs across multiple model scales.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install tqdm
+pip3 install huggingface_hub==0.25.2
+```
+
+### Download
+
+Pretrained model: <https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10s.pt>
+
+## Model Conversion
+```bash
+git clone https://github.com/THU-MIG/yolov10.git
+cd yolov10
+pip3 install -e .
+cd ..
+
+python3 export.py --weight yolov10s.pt --batch 32
+
+```
+
+## Inference
+
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
+### FP16
+
+```bash
+# Accuracy
+bash scripts/infer_yolov10_fp16_accuracy.sh
+# Performance
+bash scripts/infer_yolov10_fp16_performance.sh
+```
+
+## Results
+
+Model   |BatchSize  |Precision |FPS       |IOU@0.5   |IOU@0.5:0.95   |
+--------|-----------|----------|----------|----------|---------------|
+YOLOv10 |    32     |   FP16   | 810.97   |  0.629   |  0.461        |
+
+## Reference
+
+YOLOv10: <https://docs.ultralytics.com/models/yolov10/>
diff --git a/models/cv/detection/yolov10/igie/build_engine.py b/models/cv/detection/yolov10/igie/build_engine.py
new file mode 100644
index 00000000..d3626ae7
--- /dev/null
+++ b/models/cv/detection/yolov10/igie/build_engine.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True, 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_name, input_shape = args.input.split(":")
+    shape = tuple([int(s) for s in input_shape.split(",")])
+    input_dict = {input_name: shape}
+
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/detection/yolov10/igie/coco.yaml b/models/cv/detection/yolov10/igie/coco.yaml
new file mode 100644
index 00000000..66731c6c
--- /dev/null
+++ b/models/cv/detection/yolov10/igie/coco.yaml
@@ -0,0 +1,97 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO 2017 dataset http://cocodataset.org by Microsoft
+# Example usage: yolo train data=coco.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco  ← downloads here (20.1 GB)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../../../../data/datasets/coco/ # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # val images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
diff --git a/models/cv/detection/yolov10/igie/export.py b/models/cv/detection/yolov10/igie/export.py
new file mode 100644
index 00000000..eb56c772
--- /dev/null
+++ b/models/cv/detection/yolov10/igie/export.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+from ultralytics import YOLOv10
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--weight", 
+                    type=str, 
+                    required=True, 
+                    help="pytorch model weight.")
+    
+    parser.add_argument("--batch", 
+                type=int, 
+                required=True, 
+                help="batchsize of the model.")
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+    
+    model = YOLOv10(args.weight).cpu()
+    
+    model.export(format='onnx', batch=args.batch, imgsz=(640, 640), optimize=True, simplify=True, opset=13)
+    
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/detection/yolov10/igie/inference.py b/models/cv/detection/yolov10/igie/inference.py
new file mode 100644
index 00000000..102d2f19
--- /dev/null
+++ b/models/cv/detection/yolov10/igie/inference.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import argparse
+import tvm
+from tvm import relay
+
+import numpy as np
+from pathlib import Path
+from ultralytics import YOLO
+from ultralytics.cfg import get_cfg
+from ultralytics.utils import DEFAULT_CFG
+from validator import IGIE_Validator
+
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        overrides = {}
+        overrides['mode'] = 'val'
+
+        cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
+
+        cfg_args.batch = args.batchsize
+        cfg_args.data = './coco.yaml'
+        cfg_args.save_json = True
+        
+        validator = IGIE_Validator(args=cfg_args, save_dir=Path('.'))
+        validator.stride = 32
+        
+        stats = validator(module, device)
+
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_accuracy.sh b/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_accuracy.sh
new file mode 100644
index 00000000..2b069afa
--- /dev/null
+++ b/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="yolov10s.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input images:${batchsize},3,640,640   \
+    --precision fp16                        \
+    --engine_path yolov10s_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine yolov10s_bs_${batchsize}_fp16.so \
+    --batchsize ${batchsize}                  \
+    --input_name images                       \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_performance.sh b/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_performance.sh
new file mode 100644
index 00000000..4f4b997c
--- /dev/null
+++ b/models/cv/detection/yolov10/igie/scripts/infer_yolov10_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+batchsize=32
+model_path="yolov10s.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                     \
+    --model_path ${model_path}              \
+    --input images:${batchsize},3,640,640   \
+    --precision fp16                        \
+    --engine_path yolov10s_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                          \
+    --engine yolov10s_bs_${batchsize}_fp16.so \
+    --batchsize ${batchsize}                  \
+    --input_name images                       \
+    --datasets ${datasets_path}               \
+    --perf_only True
\ No newline at end of file
diff --git a/models/cv/detection/yolov10/igie/validator.py b/models/cv/detection/yolov10/igie/validator.py
new file mode 100644
index 00000000..85ac9382
--- /dev/null
+++ b/models/cv/detection/yolov10/igie/validator.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import tvm
+import json
+import torch
+import numpy as np
+
+from tqdm import tqdm
+
+from ultralytics.models.yolov10 import YOLOv10DetectionValidator
+from ultralytics.data.utils import check_det_dataset
+from ultralytics.utils.metrics import ConfusionMatrix
+from ultralytics.data.converter import coco80_to_coco91_class
+
+class IGIE_Validator(YOLOv10DetectionValidator):
+    def __call__(self, engine, device):
+        self.data = check_det_dataset(self.args.data)
+        self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch)
+        self.init_metrics()
+
+        self.stats = {'tp': [], 'conf': [], 'pred_cls': [], 'target_cls': []}
+        
+        # wram up
+        for _ in range(3):
+            engine.run()
+
+        for batch in tqdm(self.dataloader):
+            batch = self.preprocess(batch)
+
+            imgs = batch['img']
+            pad_batch = len(imgs) != self.args.batch
+            if pad_batch:
+                origin_size = len(imgs)
+                imgs = np.resize(imgs, (self.args.batch, *imgs.shape[1:]))
+            
+            engine.set_input(0, tvm.nd.array(imgs, device))
+            
+            engine.run()
+            
+            outputs = engine.get_output(0).asnumpy()
+
+            if pad_batch:
+                outputs = outputs[:origin_size]
+            
+            outputs = torch.from_numpy(outputs)
+            
+            preds = self.postprocess([outputs])
+            
+            self.update_metrics(preds, batch)
+        
+        stats = self.get_stats()
+
+        if self.args.save_json and self.jdict:
+            with open(str(self.save_dir / 'predictions.json'), 'w') as f:
+                print(f'Saving {f.name} ...')
+                json.dump(self.jdict, f)  # flatten and save
+
+        stats = self.eval_json(stats)
+
+        return stats
+
+    def init_metrics(self):
+        """Initialize evaluation metrics for YOLO."""
+        val = self.data.get(self.args.split, '')  # validation path
+        self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt')  # is COCO
+        self.class_map = coco80_to_coco91_class() if self.is_coco else list(range(1000))
+        self.args.save_json |= self.is_coco and not self.training  # run on final val if training COCO
+        self.names = self.data['names']
+        self.nc = len(self.names)
+        self.metrics.names = self.names
+        self.confusion_matrix = ConfusionMatrix(nc=80)
+        self.seen = 0
+        self.jdict = []
+        self.stats = []
+
-- 
Gitee


From 5eea6a1537dea9b7e8843e614a679bb8b200bdb7 Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Sat, 12 Oct 2024 14:43:12 +0800
Subject: [PATCH 14/18] Add: kie_layoutxlm inference script.

---
 models/cv/ocr/kie_layoutxlm/README.md         |  66 ++++++++
 models/cv/ocr/kie_layoutxlm/build_engine.py   |  77 +++++++++
 models/cv/ocr/kie_layoutxlm/inference.py      | 157 ++++++++++++++++++
 .../scripts/infer_kie_ser_fp16_accuracy.sh    |  35 ++++
 .../scripts/infer_kie_ser_fp16_performance.sh |  36 ++++
 5 files changed, 371 insertions(+)
 create mode 100644 models/cv/ocr/kie_layoutxlm/README.md
 create mode 100644 models/cv/ocr/kie_layoutxlm/build_engine.py
 create mode 100644 models/cv/ocr/kie_layoutxlm/inference.py
 create mode 100644 models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_accuracy.sh
 create mode 100644 models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_performance.sh

diff --git a/models/cv/ocr/kie_layoutxlm/README.md b/models/cv/ocr/kie_layoutxlm/README.md
new file mode 100644
index 00000000..8a4eed92
--- /dev/null
+++ b/models/cv/ocr/kie_layoutxlm/README.md
@@ -0,0 +1,66 @@
+# LayoutXLM
+## Description
+LayoutXLM is a groundbreaking multimodal pre-trained model for multilingual document understanding, achieving exceptional performance by integrating text, layout, and image data.
+
+## Setup
+```shell
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install Polygon3
+pip3 install paddlenlp==2.8.1
+pip3 install lanms-neo==1.0.2
+pip3 install paddleocr==2.6.0
+pip3 install paddle2onnx
+pip3 install python-bidi
+```
+
+## Download
+Pretrained model: <https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_pretrained.tar>
+
+Dataset: <https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar> to download the XFUND_zh dataset.
+
+## Model Conversion
+```shell
+
+tar -xf ser_vi_layoutxlm_xfund_pretrained.tar
+tar -xf XFUND.tar
+
+git clone -b release/2.6 https://github.com/PaddlePaddle/PaddleOCR.git
+
+cd PaddleOCR
+mkdir -p train_data/XFUND
+cp ../XFUND/class_list_xfun.txt train_data/XFUND
+
+# Export the trained model into inference model
+python3 tools/export_model.py -c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml -o Architecture.Backbone.checkpoints=../ser_vi_layoutxlm_xfund_pretrained/best_accuracy Global.save_inference_dir=./inference/ser_vi_layoutxlm
+
+# Export the inference model to onnx model
+paddle2onnx --model_dir ./inference/ser_vi_layoutxlm --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ../kie_ser.onnx --opset_version 11 --enable_onnx_checker True
+
+cd ..
+
+# Use onnxsim optimize onnx model
+onnxsim kie_ser.onnx kie_ser_opt.onnx
+``` 
+
+## Inference
+```shell 
+export DATASETS_DIR=/Path/to/XFUND/
+```
+### FP16
+```shell
+# Accuracy
+bash scripts/infer_kie_ser_fp16_accuracy.sh
+# Performance
+bash scripts/infer_kie_ser_fp16_performance.sh
+```
+
+## Results
+
+Model   |BatchSize  |Precision |FPS       |Hmean     |
+--------|-----------|----------|----------|----------|
+Kie_ser |    8      |   FP16   | 107.65   |  93.61%  |
+
+## Reference
+PaddleOCR: <https://github.com/PaddlePaddle/PaddleOCR/blob/main/docs/algorithm/kie/algorithm_kie_layoutxlm.md>
diff --git a/models/cv/ocr/kie_layoutxlm/build_engine.py b/models/cv/ocr/kie_layoutxlm/build_engine.py
new file mode 100644
index 00000000..cf27aa83
--- /dev/null
+++ b/models/cv/ocr/kie_layoutxlm/build_engine.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True,
+                        nargs='+', 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_dict = {}
+
+    for input in args.input:
+        input_name, input_shape = input.split(":")
+        shape = tuple([int(s) for s in input_shape.split(",")])
+        input_dict[input_name] = shape
+    
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/ocr/kie_layoutxlm/inference.py b/models/cv/ocr/kie_layoutxlm/inference.py
new file mode 100644
index 00000000..2154d5c8
--- /dev/null
+++ b/models/cv/ocr/kie_layoutxlm/inference.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import argparse
+import tvm
+import yaml
+import logging
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+
+import paddle
+from paddleocr import ppocr
+from ppocr.data import build_dataloader
+from ppocr.utils.logging import get_logger
+from ppocr.postprocess import build_post_process
+
+class VQASerTokenMetric(object):
+    def __init__(self, main_indicator='hmean', **kwargs):
+        self.main_indicator = main_indicator
+        self.reset()
+
+    def __call__(self, preds, batch, **kwargs):
+        preds, labels = preds
+        self.pred_list.extend(preds)
+        self.gt_list.extend(labels)
+
+    def get_metric(self):
+        from seqeval.metrics import f1_score, precision_score, recall_score
+        metrics = {
+            "precision": precision_score(self.gt_list, self.pred_list),
+            "recall": recall_score(self.gt_list, self.pred_list),
+            "hmean": f1_score(self.gt_list, self.pred_list),
+        }
+        self.reset()
+        return metrics
+
+    def reset(self):
+        self.pred_list = []
+        self.gt_list = []
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    logger = get_logger(log_level=logging.INFO)
+    
+    input_names = args.input_name.split(",")
+    config = yaml.load(open("ser_vi_layoutxlm_xfund_zh.yml", 'rb'), Loader=yaml.Loader)
+    config['Eval']['loader']['batch_size_per_card'] = args.batchsize
+    config['Eval']['dataset']['data_dir'] = os.path.join(args.datasets, "zh_val/image")
+    config['Eval']['dataset']['label_file_list'] = os.path.join(args.datasets, "zh_val/val.json")
+    config['Eval']['dataset']['transforms'][1]['VQATokenLabelEncode']['class_path'] = os.path.join(args.datasets, "class_list_xfun.txt")
+    config['PostProcess']['class_path'] = os.path.join(args.datasets, "class_list_xfun.txt")
+
+    # build dataloader
+    config['Eval']['loader']['drop_last'] = True
+    valid_dataloder = build_dataloader(config, 'Eval', paddle.set_device("cpu"), logger)
+
+    # build post process
+    post_process_class = build_post_process(config['PostProcess'])
+
+    # build metric
+    eval_class = eval('VQASerTokenMetric')()
+
+    # creat target and device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+    
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = args.batchsize * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        for batch in tqdm(valid_dataloder):
+            
+            for idx, input_name in enumerate(input_names):
+                module.set_input(input_name, tvm.nd.array(batch[idx], device))
+
+            module.run()
+
+            outputs = module.get_output(0).asnumpy()
+            outputs = paddle.to_tensor(outputs)
+
+            batch_numpy = []
+            for item in batch:
+                batch_numpy.append(item.numpy())
+            
+            post_result = post_process_class((outputs), batch_numpy)
+            eval_class(post_result, batch_numpy)
+
+        metric = eval_class.get_metric()
+        print(metric)
+
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_accuracy.sh b/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_accuracy.sh
new file mode 100644
index 00000000..32282d8f
--- /dev/null
+++ b/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.i
+
+batchsize=8
+model_path="kie_ser_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                                                                              \
+    --model_path ${model_path}                                                                       \
+    --input x_0:${batchsize},512 x_1:${batchsize},512,4 x_2:${batchsize},512 x_3:${batchsize},512    \
+    --precision fp16                                                                                 \
+    --engine_path kie_ser_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                              \
+    --engine kie_ser_bs_${batchsize}_fp16.so      \
+    --batchsize ${batchsize}                      \
+    --input_name x_0,x_1,x_2,x_3                  \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_performance.sh b/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_performance.sh
new file mode 100644
index 00000000..2205c987
--- /dev/null
+++ b/models/cv/ocr/kie_layoutxlm/scripts/infer_kie_ser_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.i
+
+batchsize=8
+model_path="kie_ser_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                                                                              \
+    --model_path ${model_path}                                                                       \
+    --input x_0:${batchsize},512 x_1:${batchsize},512,4 x_2:${batchsize},512 x_3:${batchsize},512    \
+    --precision fp16                                                                                 \
+    --engine_path kie_ser_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                              \
+    --engine kie_ser_bs_${batchsize}_fp16.so      \
+    --batchsize ${batchsize}                      \
+    --input_name x_0,x_1,x_2,x_3                  \
+    --datasets ${datasets_path}                   \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From c88f56a5f05cdeb2ffe951c913dce462384c342c Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Sat, 12 Oct 2024 15:45:48 +0800
Subject: [PATCH 15/18] Add: rec_svtr inference script.

---
 models/cv/ocr/rec_svtr/README.md              |  62 ++++++
 models/cv/ocr/rec_svtr/build_engine.py        |  77 +++++++
 models/cv/ocr/rec_svtr/inference.py           | 192 ++++++++++++++++++
 .../rec_svtr_tiny_6local_6global_stn_en.yml   | 132 ++++++++++++
 .../scripts/infer_svtr_fp16_accuracy.sh       |  35 ++++
 .../scripts/infer_svtr_fp16_performance.sh    |  36 ++++
 6 files changed, 534 insertions(+)
 create mode 100644 models/cv/ocr/rec_svtr/README.md
 create mode 100644 models/cv/ocr/rec_svtr/build_engine.py
 create mode 100644 models/cv/ocr/rec_svtr/inference.py
 create mode 100644 models/cv/ocr/rec_svtr/rec_svtr_tiny_6local_6global_stn_en.yml
 create mode 100644 models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_accuracy.sh
 create mode 100644 models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_performance.sh

diff --git a/models/cv/ocr/rec_svtr/README.md b/models/cv/ocr/rec_svtr/README.md
new file mode 100644
index 00000000..b986a545
--- /dev/null
+++ b/models/cv/ocr/rec_svtr/README.md
@@ -0,0 +1,62 @@
+# SVTR
+## Description
+SVTR proposes a single vision model for scene text recognition. This model completely abandons sequence modeling within the patch-wise image tokenization framework. Under the premise of competitive accuracy, the model has fewer parameters and faster speed.
+
+## Setup
+```shell
+pip3 install tqdm
+pip3 install onnx
+pip3 install onnxsim
+pip3 install numpy==1.24.0
+pip3 install Polygon3
+pip3 install paddlenlp==2.8.1
+pip3 install lanms-neo==1.0.2
+pip3 install paddleocr==2.8.1
+pip3 install paddle2onnx
+pip3 install python-bidi
+```
+
+## Download
+Pretrained model:<https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar>
+
+Dataset: <https://www.dropbox.com/scl/fo/zf04eicju8vbo4s6wobpq/ALAXXq2iwR6wKJyaybRmHiI?rlkey=2rywtkyuz67b20hk58zkfhh2r&e=1&dl=0> to download the lmdb evaluation datasets.
+
+## Model Conversion
+```shell
+tar -xf rec_svtr_tiny_none_ctc_en_train.tar
+
+git clone -b release/2.6 https://github.com/PaddlePaddle/PaddleOCR.git
+
+cd PaddleOCR
+
+# Export the trained model into inference model
+python3 tools/export_model.py -c ../rec_svtr_tiny_6local_6global_stn_en.yml -o Global.pretrained_model=../rec_svtr_tiny_none_ctc_en_train/best_accuracy Global.save_inference_dir=./inference/rec_svtr_tiny
+
+# Export the inference model to onnx model
+paddle2onnx --model_dir ./inference/rec_svtr_tiny --model_filename inference.pdmodel --params_filename inference.pdiparams --save_file ../SVTR.onnx --opset_version 13 --enable_onnx_checker True
+
+cd ..
+
+# Use onnxsim optimize onnx model
+onnxsim SVTR.onnx SVTR_opt.onnx
+``` 
+
+## Inference
+```shell 
+export DATASETS_DIR=/Path/to/lmdb_evaluation/
+```
+### FP16
+```shell
+# Accuracy
+bash scripts/infer_svtr_fp16_accuracy.sh
+# Performance
+bash scripts/infer_svtr_fp16_performance.sh
+```
+
+## Results
+Model   |BatchSize  |Precision |FPS       |Acc       |
+--------|-----------|----------|----------|----------|
+SVTR    |    32     |   FP16   | 4936.47  |  88.29%  |
+
+## Reference
+PaddleOCR: https://github.com/PaddlePaddle/PaddleOCR/blob/main/docs/algorithm/text_recognition/algorithm_rec_svtr.md
diff --git a/models/cv/ocr/rec_svtr/build_engine.py b/models/cv/ocr/rec_svtr/build_engine.py
new file mode 100644
index 00000000..cf27aa83
--- /dev/null
+++ b/models/cv/ocr/rec_svtr/build_engine.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import tvm
+import argparse
+from tvm import relay
+from tvm.relay.import_model import import_model_to_igie
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--model_path", 
+                        type=str, 
+                        required=True, 
+                        help="original model path.")
+    
+    parser.add_argument("--engine_path", 
+                        type=str, 
+                        required=True, 
+                        help="igie export engine path.")
+
+    parser.add_argument("--input", 
+                        type=str, 
+                        required=True,
+                        nargs='+', 
+                        help="""
+                            input info of the model, format should be:
+                            input_name:input_shape
+                            eg: --input input:1,3,224,224.
+                            """)
+               
+    parser.add_argument("--precision",
+                        type=str,
+                        choices=["fp32", "fp16", "int8"],
+                        required=True,
+                        help="model inference precision.")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    # get input valueinfo
+    input_dict = {}
+
+    for input in args.input:
+        input_name, input_shape = input.split(":")
+        shape = tuple([int(s) for s in input_shape.split(",")])
+        input_dict[input_name] = shape
+    
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+
+    mod, params = import_model_to_igie(args.model_path, input_dict, backend="igie")
+
+    # build engine
+    lib = tvm.relay.build(mod, target=target, params=params, precision=args.precision)
+
+    # export engine
+    lib.export_library(args.engine_path)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/ocr/rec_svtr/inference.py b/models/cv/ocr/rec_svtr/inference.py
new file mode 100644
index 00000000..b91c4699
--- /dev/null
+++ b/models/cv/ocr/rec_svtr/inference.py
@@ -0,0 +1,192 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import os
+import argparse
+import tvm
+import yaml
+import logging
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+
+import paddle
+from paddleocr import ppocr
+from ppocr.data import build_dataloader
+from ppocr.utils.logging import get_logger
+from ppocr.postprocess import build_post_process
+
+import string
+from rapidfuzz.distance import Levenshtein
+
+
+class RecMetric(object):
+    def __init__(self,
+                 main_indicator='acc',
+                 is_filter=False,
+                 ignore_space=True,
+                 **kwargs):
+        self.main_indicator = main_indicator
+        self.is_filter = is_filter
+        self.ignore_space = ignore_space
+        self.eps = 1e-5
+        self.reset()
+
+    def _normalize_text(self, text):
+        text = ''.join(
+            filter(lambda x: x in (string.digits + string.ascii_letters), text))
+        return text.lower()
+
+    def __call__(self, pred_label, *args, **kwargs):
+        preds, labels = pred_label
+        correct_num = 0
+        all_num = 0
+        norm_edit_dis = 0.0
+        for (pred, pred_conf), (target, _) in zip(preds, labels):
+            if self.ignore_space:
+                pred = pred.replace(" ", "")
+                target = target.replace(" ", "")
+            if self.is_filter:
+                pred = self._normalize_text(pred)
+                target = self._normalize_text(target)
+            norm_edit_dis += Levenshtein.normalized_distance(pred, target)
+            if pred == target:
+                correct_num += 1
+            all_num += 1
+        self.correct_num += correct_num
+        self.all_num += all_num
+        self.norm_edit_dis += norm_edit_dis
+        return {
+            'acc': correct_num / (all_num + self.eps),
+            'norm_edit_dis': 1 - norm_edit_dis / (all_num + self.eps)
+        }
+
+    def get_metric(self):
+        """
+        return metrics {
+                 'acc': 0,
+                 'norm_edit_dis': 0,
+            }
+        """
+        acc = 1.0 * self.correct_num / (self.all_num + self.eps)
+        norm_edit_dis = 1 - self.norm_edit_dis / (self.all_num + self.eps)
+        self.reset()
+        return {'acc': acc, 'norm_edit_dis': norm_edit_dis}
+
+    def reset(self):
+        self.correct_num = 0
+        self.all_num = 0
+        self.norm_edit_dis = 0
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def main():
+    args = parse_args()
+
+    logger = get_logger(log_level=logging.INFO)
+    
+    config = yaml.load(open("rec_svtr_tiny_6local_6global_stn_en.yml", 'rb'), Loader=yaml.Loader)
+    config['Eval']['loader']['batch_size_per_card'] = args.batchsize
+    config['Eval']['dataset']['data_dir'] = os.path.join(args.datasets)
+
+    # build dataloader
+    config['Eval']['loader']['drop_last'] = True
+    valid_dataloder = build_dataloader(config, 'Eval', paddle.set_device("cpu"), logger)
+
+    # build post process
+    global_config = config['Global']
+    post_process_class = build_post_process(config['PostProcess'], global_config)
+
+    # build metric
+    eval_class = eval('RecMetric')()
+
+    # creat target and device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+    
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = args.batchsize * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        for batch in tqdm(valid_dataloder):
+            images = batch[0]
+
+            module.set_input(args.input_name, tvm.nd.array(images, device))
+
+            module.run()
+
+            outputs = module.get_output(0).asnumpy()
+            outputs = paddle.to_tensor(outputs)
+
+            batch_numpy = []
+            for item in batch:
+                batch_numpy.append(item.numpy())
+            
+            post_result = post_process_class((outputs), batch_numpy[1])
+            eval_class(post_result, batch_numpy)
+
+        metric = eval_class.get_metric()
+        print(metric)
+
+if __name__ == "__main__":
+    main()
diff --git a/models/cv/ocr/rec_svtr/rec_svtr_tiny_6local_6global_stn_en.yml b/models/cv/ocr/rec_svtr/rec_svtr_tiny_6local_6global_stn_en.yml
new file mode 100644
index 00000000..3c87f9af
--- /dev/null
+++ b/models/cv/ocr/rec_svtr/rec_svtr_tiny_6local_6global_stn_en.yml
@@ -0,0 +1,132 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+Global:
+  use_gpu: True
+  epoch_num: 20
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/rec_svtr_tiny_en/
+  save_epoch_step: 1
+  # evaluation is run every 2000 iterations after the 0th iteration
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path:
+  character_type: en
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/rec/predicts_svtr_tiny.txt
+
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.99
+  epsilon: 0.00000008
+  weight_decay: 0.05
+  no_weight_decay_name: norm pos_embed
+  one_dim_param_no_weight_decay: true
+  lr:
+    name: Cosine
+    learning_rate: 0.0005
+    warmup_epoch: 2
+
+Architecture:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+    name: STN_ON
+    tps_inputsize: [32, 64]
+    tps_outputsize: [32, 100]
+    num_control_points: 20
+    tps_margins: [0.05,0.05]
+    stn_activation: none
+  Backbone:
+    name: SVTRNet
+    img_size: [32, 100]
+    out_char_num: 25
+    out_channels: 192
+    patch_merging: 'Conv'
+    embed_dim: [64, 128, 256]
+    depth: [3, 6, 3]
+    num_heads: [2, 4, 8]
+    mixer: ['Local','Local','Local','Local','Local','Local','Global','Global','Global','Global','Global','Global']
+    local_mixer: [[7, 11], [7, 11], [7, 11]]
+    last_stage: True
+    prenorm: false
+  Neck:
+    name: SequenceEncoder
+    encoder_type: reshape
+  Head:
+    name: CTCHead
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - SVTRRecResizeImg:
+          character_dict_path:
+          image_shape: [3, 64, 256]
+          padding: False
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 512
+    drop_last: True
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/evaluation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - SVTRRecResizeImg:
+          character_dict_path:
+          image_shape: [3, 64, 256]
+          padding: False
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 2
diff --git a/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_accuracy.sh b/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_accuracy.sh
new file mode 100644
index 00000000..e195990e
--- /dev/null
+++ b/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_accuracy.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.i
+
+batchsize=32
+model_path="SVTR_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                            \
+    --model_path ${model_path}                     \
+    --input x:${batchsize},3,64,256                \
+    --precision fp16                               \
+    --engine_path svtr_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                              \
+    --engine svtr_bs_${batchsize}_fp16.so         \
+    --batchsize ${batchsize}                      \
+    --input_name x                                \
+    --datasets ${datasets_path}
\ No newline at end of file
diff --git a/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_performance.sh b/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_performance.sh
new file mode 100644
index 00000000..34484ee8
--- /dev/null
+++ b/models/cv/ocr/rec_svtr/scripts/infer_svtr_fp16_performance.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.i
+
+batchsize=32
+model_path="SVTR_opt.onnx"
+datasets_path=${DATASETS_DIR}
+
+# build engine
+python3 build_engine.py                            \
+    --model_path ${model_path}                     \
+    --input x:${batchsize},3,64,256                \
+    --precision fp16                               \
+    --engine_path svtr_bs_${batchsize}_fp16.so
+
+
+# inference
+python3 inference.py                              \
+    --engine svtr_bs_${batchsize}_fp16.so         \
+    --batchsize ${batchsize}                      \
+    --input_name x                                \
+    --datasets ${datasets_path}                   \
+    --perf_only True
\ No newline at end of file
-- 
Gitee


From a915537505404c6821783a847266e6617a3c99be Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Mon, 14 Oct 2024 10:34:09 +0800
Subject: [PATCH 16/18] Update: delete coco.yaml file in yolov9/yolov10.

---
 models/cv/detection/yolov10/igie/README.md    |  2 +-
 models/cv/detection/yolov10/igie/coco.yaml    | 97 -------------------
 models/cv/detection/yolov10/igie/inference.py | 29 +++++-
 models/cv/detection/yolov10/igie/validator.py |  4 +-
 models/cv/detection/yolov9/igie/coco.yaml     | 97 -------------------
 models/cv/detection/yolov9/igie/inference.py  | 27 +++++-
 models/cv/detection/yolov9/igie/validator.py  |  2 +-
 7 files changed, 56 insertions(+), 202 deletions(-)
 delete mode 100644 models/cv/detection/yolov10/igie/coco.yaml
 delete mode 100644 models/cv/detection/yolov9/igie/coco.yaml

diff --git a/models/cv/detection/yolov10/igie/README.md b/models/cv/detection/yolov10/igie/README.md
index 088b0907..6fa83cac 100644
--- a/models/cv/detection/yolov10/igie/README.md
+++ b/models/cv/detection/yolov10/igie/README.md
@@ -20,7 +20,7 @@ Pretrained model: <https://github.com/THU-MIG/yolov10/releases/download/v1.1/yol
 ```bash
 git clone https://github.com/THU-MIG/yolov10.git
 cd yolov10
-pip3 install -e .
+pip3 install -e . --no-deps
 cd ..
 
 python3 export.py --weight yolov10s.pt --batch 32
diff --git a/models/cv/detection/yolov10/igie/coco.yaml b/models/cv/detection/yolov10/igie/coco.yaml
deleted file mode 100644
index 66731c6c..00000000
--- a/models/cv/detection/yolov10/igie/coco.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-# COCO 2017 dataset http://cocodataset.org by Microsoft
-# Example usage: yolo train data=coco.yaml
-# parent
-# ├── ultralytics
-# └── datasets
-#     └── coco  ← downloads here (20.1 GB)
-
-
-# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../../../../data/datasets/coco/ # dataset root dir
-train: train2017.txt  # train images (relative to 'path') 118287 images
-val: val2017.txt  # val images (relative to 'path') 5000 images
-test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
-
-# Classes
-names:
-  0: person
-  1: bicycle
-  2: car
-  3: motorcycle
-  4: airplane
-  5: bus
-  6: train
-  7: truck
-  8: boat
-  9: traffic light
-  10: fire hydrant
-  11: stop sign
-  12: parking meter
-  13: bench
-  14: bird
-  15: cat
-  16: dog
-  17: horse
-  18: sheep
-  19: cow
-  20: elephant
-  21: bear
-  22: zebra
-  23: giraffe
-  24: backpack
-  25: umbrella
-  26: handbag
-  27: tie
-  28: suitcase
-  29: frisbee
-  30: skis
-  31: snowboard
-  32: sports ball
-  33: kite
-  34: baseball bat
-  35: baseball glove
-  36: skateboard
-  37: surfboard
-  38: tennis racket
-  39: bottle
-  40: wine glass
-  41: cup
-  42: fork
-  43: knife
-  44: spoon
-  45: bowl
-  46: banana
-  47: apple
-  48: sandwich
-  49: orange
-  50: broccoli
-  51: carrot
-  52: hot dog
-  53: pizza
-  54: donut
-  55: cake
-  56: chair
-  57: couch
-  58: potted plant
-  59: bed
-  60: dining table
-  61: toilet
-  62: tv
-  63: laptop
-  64: mouse
-  65: remote
-  66: keyboard
-  67: cell phone
-  68: microwave
-  69: oven
-  70: toaster
-  71: sink
-  72: refrigerator
-  73: book
-  74: clock
-  75: vase
-  76: scissors
-  77: teddy bear
-  78: hair drier
-  79: toothbrush
diff --git a/models/cv/detection/yolov10/igie/inference.py b/models/cv/detection/yolov10/igie/inference.py
index 102d2f19..d7c2430e 100644
--- a/models/cv/detection/yolov10/igie/inference.py
+++ b/models/cv/detection/yolov10/igie/inference.py
@@ -12,7 +12,7 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
-
+import os
 import argparse
 import tvm
 from tvm import relay
@@ -95,13 +95,38 @@ def main():
         fps = batch_size * 1000 / np.mean(prof_res)
         print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
     else:
+        root_path = args.datasets
+        val_path = os.path.join(root_path, 'val2017.txt')
+
         overrides = {}
         overrides['mode'] = 'val'
 
         cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
 
         cfg_args.batch = args.batchsize
-        cfg_args.data = './coco.yaml'
+
+        cfg_args.data =  {
+            'path': Path(root_path), 
+            'val': val_path, 
+            'names': 
+            {
+                0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 
+                6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 
+                11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 
+                16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 
+                22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 
+                27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 
+                32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 
+                36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 
+                40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 
+                46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 
+                51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 
+                57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 
+                62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 
+                68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 
+                74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
+            }, 
+            'nc': 80}
         cfg_args.save_json = True
         
         validator = IGIE_Validator(args=cfg_args, save_dir=Path('.'))
diff --git a/models/cv/detection/yolov10/igie/validator.py b/models/cv/detection/yolov10/igie/validator.py
index 85ac9382..9036ab64 100644
--- a/models/cv/detection/yolov10/igie/validator.py
+++ b/models/cv/detection/yolov10/igie/validator.py
@@ -18,17 +18,15 @@ import tvm
 import json
 import torch
 import numpy as np
-
 from tqdm import tqdm
 
 from ultralytics.models.yolov10 import YOLOv10DetectionValidator
-from ultralytics.data.utils import check_det_dataset
 from ultralytics.utils.metrics import ConfusionMatrix
 from ultralytics.data.converter import coco80_to_coco91_class
 
 class IGIE_Validator(YOLOv10DetectionValidator):
     def __call__(self, engine, device):
-        self.data = check_det_dataset(self.args.data)
+        self.data = self.args.data
         self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch)
         self.init_metrics()
 
diff --git a/models/cv/detection/yolov9/igie/coco.yaml b/models/cv/detection/yolov9/igie/coco.yaml
deleted file mode 100644
index 66731c6c..00000000
--- a/models/cv/detection/yolov9/igie/coco.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-# COCO 2017 dataset http://cocodataset.org by Microsoft
-# Example usage: yolo train data=coco.yaml
-# parent
-# ├── ultralytics
-# └── datasets
-#     └── coco  ← downloads here (20.1 GB)
-
-
-# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../../../../data/datasets/coco/ # dataset root dir
-train: train2017.txt  # train images (relative to 'path') 118287 images
-val: val2017.txt  # val images (relative to 'path') 5000 images
-test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
-
-# Classes
-names:
-  0: person
-  1: bicycle
-  2: car
-  3: motorcycle
-  4: airplane
-  5: bus
-  6: train
-  7: truck
-  8: boat
-  9: traffic light
-  10: fire hydrant
-  11: stop sign
-  12: parking meter
-  13: bench
-  14: bird
-  15: cat
-  16: dog
-  17: horse
-  18: sheep
-  19: cow
-  20: elephant
-  21: bear
-  22: zebra
-  23: giraffe
-  24: backpack
-  25: umbrella
-  26: handbag
-  27: tie
-  28: suitcase
-  29: frisbee
-  30: skis
-  31: snowboard
-  32: sports ball
-  33: kite
-  34: baseball bat
-  35: baseball glove
-  36: skateboard
-  37: surfboard
-  38: tennis racket
-  39: bottle
-  40: wine glass
-  41: cup
-  42: fork
-  43: knife
-  44: spoon
-  45: bowl
-  46: banana
-  47: apple
-  48: sandwich
-  49: orange
-  50: broccoli
-  51: carrot
-  52: hot dog
-  53: pizza
-  54: donut
-  55: cake
-  56: chair
-  57: couch
-  58: potted plant
-  59: bed
-  60: dining table
-  61: toilet
-  62: tv
-  63: laptop
-  64: mouse
-  65: remote
-  66: keyboard
-  67: cell phone
-  68: microwave
-  69: oven
-  70: toaster
-  71: sink
-  72: refrigerator
-  73: book
-  74: clock
-  75: vase
-  76: scissors
-  77: teddy bear
-  78: hair drier
-  79: toothbrush
diff --git a/models/cv/detection/yolov9/igie/inference.py b/models/cv/detection/yolov9/igie/inference.py
index 9339ffe9..cbed1c03 100644
--- a/models/cv/detection/yolov9/igie/inference.py
+++ b/models/cv/detection/yolov9/igie/inference.py
@@ -97,13 +97,38 @@ def main():
         fps = batch_size * 1000 / np.mean(prof_res)
         print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
     else:
+        root_path = args.datasets
+        val_path = os.path.join(root_path, 'val2017.txt')
+        
         overrides = {}
         overrides['mode'] = 'val'
 
         cfg_args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
 
         cfg_args.batch = args.batchsize
-        cfg_args.data = './coco.yaml'
+
+        cfg_args.data =  {
+            'path': Path(root_path), 
+            'val': val_path, 
+            'names': 
+            {
+                0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 
+                6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 
+                11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 
+                16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 
+                22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 
+                27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 
+                32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 
+                36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 
+                40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 
+                46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 
+                51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 
+                57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 
+                62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 
+                68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 
+                74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
+            }, 
+            'nc': 80}
         cfg_args.save_json = True
         
         validator = IGIE_Validator(args=cfg_args, save_dir=Path('.'))
diff --git a/models/cv/detection/yolov9/igie/validator.py b/models/cv/detection/yolov9/igie/validator.py
index 08085747..b717b5c4 100644
--- a/models/cv/detection/yolov9/igie/validator.py
+++ b/models/cv/detection/yolov9/igie/validator.py
@@ -28,7 +28,7 @@ from ultralytics.data.converter import coco80_to_coco91_class
 
 class IGIE_Validator(DetectionValidator):
     def __call__(self, engine, device):
-        self.data = check_det_dataset(self.args.data)
+        self.data = self.args.data
         self.dataloader = self.get_dataloader(self.data.get(self.args.split), self.args.batch)
         self.init_metrics()
 
-- 
Gitee


From 820efec4016b63570434cdb5a37661b111fa6a8c Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Mon, 14 Oct 2024 17:00:16 +0800
Subject: [PATCH 17/18] Update: update some models config file. #IAWH75

---
 models/cv/detection/atss/igie/README.md       |  11 +-
 .../atss/igie/atss_r50_fpn_1x_coco.py         |  10 +-
 models/cv/detection/atss/igie/inference.py    |   4 +-
 ...ernet_r18-dcnv2_8xb16-crop512-140e_coco.py | 151 --------
 .../centernet/igie/base/coco_detection.py     |  75 ----
 .../centernet/igie/base/default_runtime.py    |  39 --
 .../centernet/igie/base/schedule_1x.py        |  43 ---
 .../centernet_r18_8xb16-crop512-140e_coco.py  |   5 +-
 .../cv/detection/centernet/igie/inference.py  |   6 +-
 models/cv/detection/fcos/igie/README.md       |  10 +-
 .../fcos_r50_caffe_fpn_gn-head_1x_coco.py     |  21 +-
 models/cv/detection/fcos/igie/inference.py    |   4 +-
 .../foveabox/igie/base/coco_detection.py      |  75 ----
 .../foveabox/igie/base/default_runtime.py     |  39 --
 .../foveabox/igie/base/schedule_1x.py         |  43 ---
 .../igie/fovea_r50_fpn_4xb4-1x_coco.py        |   4 +-
 .../cv/detection/foveabox/igie/inference.py   |   8 +-
 models/cv/detection/fsaf/igie/README.md       |  10 +-
 .../igie/base/retinanet_r50_fpn_1x_coco.py    |  83 -----
 .../fsaf/igie/fsaf_r50_fpn_1x_coco.py         |   8 +-
 models/cv/detection/fsaf/igie/inference.py    |   4 +-
 .../hrnet/igie/base/coco_detection.py         |  75 ----
 .../hrnet/igie/base/default_runtime.py        |  39 --
 .../fcos_hrnetv2p-w32-gn-head_4xb4-1x_coco.py |  58 ---
 .../fcos_r50-caffe_fpn_gn-head_1x_coco.py     |  90 -----
 ...fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py |  19 -
 .../detection/hrnet/igie/base/schedule_1x.py  |  43 ---
 .../fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py |  13 +-
 models/cv/detection/hrnet/igie/inference.py   |   7 +-
 models/cv/detection/paa/igie/README.md        |   2 +-
 .../detection/paa/igie/base/coco_detection.py |  75 ----
 .../paa/igie/base/default_runtime.py          |  39 --
 .../cv/detection/paa/igie/base/schedule_1x.py |  43 ---
 .../detection/paa/igie/paa_r50_fpn_1x_coco.py | 335 ++++++++++++++----
 .../cv/detection/retinanet/igie/inference.py  |   6 +-
 .../igie/retinanet_r50_fpn_1x_coco.py         |  11 +-
 models/cv/detection/rtmdet/igie/README.md     |  13 +-
 models/cv/detection/rtmdet/igie/inference.py  |   4 +-
 .../igie/rtmdet_nano_320-8xb32_coco-person.py |  20 +-
 .../cv/pose_estimation/rtmpose/igie/README.md |  13 +-
 .../pose_estimation/rtmpose/igie/inference.py |   2 +-
 .../rtmpose-m_8xb256-420e_coco-256x192.py     |   6 +-
 42 files changed, 391 insertions(+), 1175 deletions(-)
 delete mode 100644 models/cv/detection/centernet/igie/base/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py
 delete mode 100644 models/cv/detection/centernet/igie/base/coco_detection.py
 delete mode 100644 models/cv/detection/centernet/igie/base/default_runtime.py
 delete mode 100644 models/cv/detection/centernet/igie/base/schedule_1x.py
 delete mode 100644 models/cv/detection/foveabox/igie/base/coco_detection.py
 delete mode 100644 models/cv/detection/foveabox/igie/base/default_runtime.py
 delete mode 100644 models/cv/detection/foveabox/igie/base/schedule_1x.py
 delete mode 100644 models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py
 delete mode 100644 models/cv/detection/hrnet/igie/base/coco_detection.py
 delete mode 100644 models/cv/detection/hrnet/igie/base/default_runtime.py
 delete mode 100644 models/cv/detection/hrnet/igie/base/fcos_hrnetv2p-w32-gn-head_4xb4-1x_coco.py
 delete mode 100644 models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_1x_coco.py
 delete mode 100644 models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py
 delete mode 100644 models/cv/detection/hrnet/igie/base/schedule_1x.py
 delete mode 100644 models/cv/detection/paa/igie/base/coco_detection.py
 delete mode 100644 models/cv/detection/paa/igie/base/default_runtime.py
 delete mode 100644 models/cv/detection/paa/igie/base/schedule_1x.py

diff --git a/models/cv/detection/atss/igie/README.md b/models/cv/detection/atss/igie/README.md
index c64581d4..c28719ce 100644
--- a/models/cv/detection/atss/igie/README.md
+++ b/models/cv/detection/atss/igie/README.md
@@ -55,6 +55,11 @@ bash scripts/infer_atss_fp16_performance.sh
 
 ## Results
 
-|   Model   | BatchSize | Input Shape | Precision |    FPS    | mAP@0.5(%) |
-| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: |
-|   ATSS    |    32     |   800x800   |    FP16   |   81.671  |    0.541   |
\ No newline at end of file
+Model  |BatchSize  |Precision |FPS       |IOU@0.5   |IOU@0.5:0.95   |
+-------|-----------|----------|----------|----------|---------------|
+ATSS   |    32     |   FP16   | 81.671   |  0.541   |  0.367        |
+
+
+## Reference
+
+mmdetection: <https://github.com/open-mmlab/mmdetection.git>
\ No newline at end of file
diff --git a/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py b/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py
index 0378cf0b..3fc51df9 100755
--- a/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py
+++ b/models/cv/detection/atss/igie/atss_r50_fpn_1x_coco.py
@@ -130,7 +130,7 @@ model = dict(
         pos_weight=-1),
     type='ATSS')
 optim_wrapper = dict(
-    optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
+    optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001),
     type='OptimWrapper')
 param_scheduler = [
     dict(
@@ -154,7 +154,7 @@ test_dataloader = dict(
         ann_file='annotations/instances_val2017.json',
         backend_args=None,
         data_prefix=dict(img='images/val2017/'),
-        data_root='/root/.igie_cache/modelzoo_data/datasets/coco/',
+        data_root='data/coco/',
         pipeline=[
             dict(backend_args=None, type='LoadImageFromFile'),
             dict(keep_ratio=False, scale=(
@@ -179,8 +179,7 @@ test_dataloader = dict(
     persistent_workers=True,
     sampler=dict(shuffle=False, type='DefaultSampler'))
 test_evaluator = dict(
-    ann_file=
-    '/root/.igie_cache/modelzoo_data/datasets/coco/annotations/instances_val2017.json',
+    ann_file='data/coco/annotations/instances_val2017.json',
     backend_args=None,
     format_only=False,
     metric='bbox',
@@ -281,5 +280,4 @@ visualizer = dict(
     type='DetLocalVisualizer',
     vis_backends=[
         dict(type='LocalVisBackend'),
-    ])
-work_dir = './'
+    ])
\ No newline at end of file
diff --git a/models/cv/detection/atss/igie/inference.py b/models/cv/detection/atss/igie/inference.py
index cc1a2b86..8bf78903 100644
--- a/models/cv/detection/atss/igie/inference.py
+++ b/models/cv/detection/atss/igie/inference.py
@@ -20,7 +20,7 @@ import torch
 import numpy as np
 from tvm import relay
 from tqdm import tqdm
-from mmpose.registry import RUNNERS
+from mmdet.registry import RUNNERS
 from mmengine.config import Config
 
 def parse_args():
@@ -99,7 +99,7 @@ def main():
         # runner config
         cfg = Config.fromfile("atss_r50_fpn_1x_coco.py")
 
-        cfg.work_dir = "./"
+        cfg.work_dir = "./workspace"
         cfg['test_dataloader']['batch_size'] = batch_size
         cfg['test_dataloader']['dataset']['data_root'] = args.datasets
         cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
diff --git a/models/cv/detection/centernet/igie/base/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py b/models/cv/detection/centernet/igie/base/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py
deleted file mode 100644
index 894e4b4f..00000000
--- a/models/cv/detection/centernet/igie/base/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-
-_base_ = [
-    'coco_detection.py',
-    'schedule_1x.py', 'default_runtime.py',
-]
-
-dataset_type = 'CocoDataset'
-data_root = 'data/coco/'
-
-# model settings
-model = dict(
-    type='CenterNet',
-    data_preprocessor=dict(
-        type='DetDataPreprocessor',
-        mean=[123.675, 116.28, 103.53],
-        std=[58.395, 57.12, 57.375],
-        bgr_to_rgb=True),
-    backbone=dict(
-        type='ResNet',
-        depth=18,
-        norm_eval=False,
-        norm_cfg=dict(type='BN'),
-        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')),
-    neck=dict(
-        type='CTResNetNeck',
-        in_channels=512,
-        num_deconv_filters=(256, 128, 64),
-        num_deconv_kernels=(4, 4, 4),
-        use_dcn=True),
-    bbox_head=dict(
-        type='CenterNetHead',
-        num_classes=80,
-        in_channels=64,
-        feat_channels=64,
-        loss_center_heatmap=dict(type='GaussianFocalLoss', loss_weight=1.0),
-        loss_wh=dict(type='L1Loss', loss_weight=0.1),
-        loss_offset=dict(type='L1Loss', loss_weight=1.0)),
-    train_cfg=None,
-    test_cfg=dict(topk=100, local_maximum_kernel=3, max_per_img=100))
-
-train_pipeline = [
-    dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(
-        type='PhotoMetricDistortion',
-        brightness_delta=32,
-        contrast_range=(0.5, 1.5),
-        saturation_range=(0.5, 1.5),
-        hue_delta=18),
-    dict(
-        type='RandomCenterCropPad',
-        # The cropped images are padded into squares during training,
-        # but may be less than crop_size.
-        crop_size=(512, 512),
-        ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),
-        mean=[0, 0, 0],
-        std=[1, 1, 1],
-        to_rgb=True,
-        test_pad_mode=None),
-    # Make sure the output is always crop_size.
-    dict(type='Resize', scale=(512, 512), keep_ratio=True),
-    dict(type='RandomFlip', prob=0.5),
-    dict(type='PackDetInputs')
-]
-test_pipeline = [
-    dict(
-        type='LoadImageFromFile',
-        backend_args={{_base_.backend_args}},
-        to_float32=True),
-    # don't need Resize
-    dict(
-        type='RandomCenterCropPad',
-        ratios=None,
-        border=None,
-        mean=[0, 0, 0],
-        std=[1, 1, 1],
-        to_rgb=True,
-        test_mode=True,
-        test_pad_mode=['logical_or', 31],
-        test_pad_add_pix=1),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(
-        type='PackDetInputs',
-        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'border'))
-]
-
-# Use RepeatDataset to speed up training
-train_dataloader = dict(
-    batch_size=16,
-    num_workers=4,
-    persistent_workers=True,
-    sampler=dict(type='DefaultSampler', shuffle=True),
-    dataset=dict(
-        _delete_=True,
-        type='RepeatDataset',
-        times=5,
-        dataset=dict(
-            type=dataset_type,
-            data_root=data_root,
-            ann_file='annotations/instances_train2017.json',
-            data_prefix=dict(img='train2017/'),
-            filter_cfg=dict(filter_empty_gt=True, min_size=32),
-            pipeline=train_pipeline,
-            backend_args={{_base_.backend_args}},
-        )))
-
-val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
-test_dataloader = val_dataloader
-
-# optimizer
-# Based on the default settings of modern detectors, the SGD effect is better
-# than the Adam in the source code, so we use SGD default settings and
-# if you use adam+lr5e-4, the map is 29.1.
-optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
-
-max_epochs = 28
-# learning policy
-# Based on the default settings of modern detectors, we added warmup settings.
-param_scheduler = [
-    dict(
-        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
-        end=1000),
-    dict(
-        type='MultiStepLR',
-        begin=0,
-        end=max_epochs,
-        by_epoch=True,
-        milestones=[18, 24],  # the real step is [18*5, 24*5]
-        gamma=0.1)
-]
-train_cfg = dict(max_epochs=max_epochs)  # the real epoch is 28*5=140
-
-# NOTE: `auto_scale_lr` is for automatically scaling LR,
-# USER SHOULD NOT CHANGE ITS VALUES.
-# base_batch_size = (8 GPUs) x (16 samples per GPU)
-auto_scale_lr = dict(base_batch_size=128)
diff --git a/models/cv/detection/centernet/igie/base/coco_detection.py b/models/cv/detection/centernet/igie/base/coco_detection.py
deleted file mode 100644
index f58fe67b..00000000
--- a/models/cv/detection/centernet/igie/base/coco_detection.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-# dataset settings
-dataset_type = 'CocoDataset'
-data_root = 'data/coco/'
-
-backend_args = None
-
-train_pipeline = [
-    dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
-    dict(type='RandomFlip', prob=0.5),
-    dict(type='PackDetInputs')
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
-    # If you don't have a gt annotation, delete the pipeline
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(
-        type='PackDetInputs',
-        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
-                   'scale_factor'))
-]
-train_dataloader = dict(
-    batch_size=2,
-    num_workers=2,
-    persistent_workers=True,
-    sampler=dict(type='DefaultSampler', shuffle=True),
-    batch_sampler=dict(type='AspectRatioBatchSampler'),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
-        filter_cfg=dict(filter_empty_gt=True, min_size=32),
-        pipeline=train_pipeline,
-        backend_args=backend_args))
-val_dataloader = dict(
-    batch_size=1,
-    num_workers=2,
-    persistent_workers=True,
-    drop_last=False,
-    sampler=dict(type='DefaultSampler', shuffle=False),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_val2017.json',
-        data_prefix=dict(img='val2017/'),
-        test_mode=True,
-        pipeline=test_pipeline,
-        backend_args=backend_args))
-test_dataloader = val_dataloader
-
-val_evaluator = dict(
-    type='CocoMetric',
-    ann_file=data_root + 'annotations/instances_val2017.json',
-    metric='bbox',
-    format_only=False,
-    backend_args=backend_args)
-test_evaluator = val_evaluator
\ No newline at end of file
diff --git a/models/cv/detection/centernet/igie/base/default_runtime.py b/models/cv/detection/centernet/igie/base/default_runtime.py
deleted file mode 100644
index 609d8037..00000000
--- a/models/cv/detection/centernet/igie/base/default_runtime.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-default_scope = 'mmdet'
-
-default_hooks = dict(
-    timer=dict(type='IterTimerHook'),
-    logger=dict(type='LoggerHook', interval=50),
-    param_scheduler=dict(type='ParamSchedulerHook'),
-    checkpoint=dict(type='CheckpointHook', interval=1),
-    sampler_seed=dict(type='DistSamplerSeedHook'),
-    visualization=dict(type='DetVisualizationHook'))
-
-env_cfg = dict(
-    cudnn_benchmark=False,
-    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
-    dist_cfg=dict(backend='nccl'),
-)
-
-vis_backends = [dict(type='LocalVisBackend')]
-visualizer = dict(
-    type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
-log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
-
-log_level = 'INFO'
-load_from = None
-resume = False
diff --git a/models/cv/detection/centernet/igie/base/schedule_1x.py b/models/cv/detection/centernet/igie/base/schedule_1x.py
deleted file mode 100644
index 9b16d80c..00000000
--- a/models/cv/detection/centernet/igie/base/schedule_1x.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-# training schedule for 1x
-train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
-val_cfg = dict(type='ValLoop')
-test_cfg = dict(type='TestLoop')
-
-# learning rate
-param_scheduler = [
-    dict(
-        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
-    dict(
-        type='MultiStepLR',
-        begin=0,
-        end=12,
-        by_epoch=True,
-        milestones=[8, 11],
-        gamma=0.1)
-]
-
-# optimizer
-optim_wrapper = dict(
-    type='OptimWrapper',
-    optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
-
-# Default setting for scaling LR automatically
-#   - `enable` means enable scaling LR automatically
-#       or not by default.
-#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
-auto_scale_lr = dict(enable=False, base_batch_size=16)
diff --git a/models/cv/detection/centernet/igie/centernet_r18_8xb16-crop512-140e_coco.py b/models/cv/detection/centernet/igie/centernet_r18_8xb16-crop512-140e_coco.py
index 81a21421..d2156d8f 100644
--- a/models/cv/detection/centernet/igie/centernet_r18_8xb16-crop512-140e_coco.py
+++ b/models/cv/detection/centernet/igie/centernet_r18_8xb16-crop512-140e_coco.py
@@ -105,7 +105,7 @@ test_dataloader = dict(
         ann_file='annotations/instances_val2017.json',
         backend_args=None,
         data_prefix=dict(img='images/val2017/'),
-        data_root='/home/peng.yang/Datasets/coco',
+        data_root='data/coco/',
         pipeline=[
             dict(backend_args=None, to_float32=True, type='LoadImageFromFile'),
             dict(
@@ -147,7 +147,7 @@ test_dataloader = dict(
     persistent_workers=True,
     sampler=dict(shuffle=False, type='DefaultSampler'))
 test_evaluator = dict(
-    ann_file='/home/peng.yang/Datasets/coco/annotations/instances_val2017.json',
+    ann_file='data/coco/annotations/instances_val2017.json',
     backend_args=None,
     format_only=False,
     metric='bbox',
@@ -366,4 +366,3 @@ visualizer = dict(
     vis_backends=[
         dict(type='LocalVisBackend'),
     ])
-work_dir = './'
diff --git a/models/cv/detection/centernet/igie/inference.py b/models/cv/detection/centernet/igie/inference.py
index d3417486..85138c16 100644
--- a/models/cv/detection/centernet/igie/inference.py
+++ b/models/cv/detection/centernet/igie/inference.py
@@ -97,15 +97,17 @@ def main():
         for _ in range(args.warmup):
             module.run()
 
-        # Runner config
+        # runner config
         cfg = Config.fromfile("centernet_r18_8xb16-crop512-140e_coco.py")
-        cfg.work_dir = "./"
 
+        cfg.work_dir = "./workspace"
         cfg['test_dataloader']['batch_size'] = batch_size
         cfg['test_dataloader']['dataset']['data_root'] = args.datasets
         cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
         cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+        cfg['log_level'] = 'ERROR'
 
+        # build runner
         runner = RUNNERS.build(cfg)
         
         for input_data in tqdm(runner.test_dataloader):
diff --git a/models/cv/detection/fcos/igie/README.md b/models/cv/detection/fcos/igie/README.md
index f10cc742..693e6e26 100644
--- a/models/cv/detection/fcos/igie/README.md
+++ b/models/cv/detection/fcos/igie/README.md
@@ -55,6 +55,10 @@ bash scripts/infer_fcos_fp16_performance.sh
 
 ## Results
 
-|   Model   | BatchSize | Input Shape | Precision |    FPS    | mAP@0.5(%) |
-| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: |
-|    FCOS   |    32     |   800x800   |    FP16   |   83.09   |    0.522   |
\ No newline at end of file
+Model  |BatchSize  |Precision |FPS       |IOU@0.5   |IOU@0.5:0.95   |
+-------|-----------|----------|----------|----------|---------------|
+FCOS   |    32     |   FP16   | 83.09    |  0.522   |  0.339        |
+
+## Reference
+
+mmdetection: <https://github.com/open-mmlab/mmdetection.git>
\ No newline at end of file
diff --git a/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py b/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py
index 04941d97..d90cc0e4 100755
--- a/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py
+++ b/models/cv/detection/fcos/igie/fcos_r50_caffe_fpn_gn-head_1x_coco.py
@@ -29,7 +29,6 @@ env_cfg = dict(
     cudnn_benchmark=False,
     dist_cfg=dict(backend='nccl'),
     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
-evaluation = dict(interval=1, metric='bbox')
 load_from = None
 log_level = 'ERROR'
 log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
@@ -108,11 +107,17 @@ model = dict(
         score_thr=0.05),
     type='FCOS')
 optim_wrapper = dict(
-    optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001),
+    clip_grad=dict(max_norm=35, norm_type=2),
+    optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001),
+    paramwise_cfg=dict(bias_decay_mult=0.0, bias_lr_mult=2.0),
     type='OptimWrapper')
 param_scheduler = [
     dict(
-        begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
+        begin=0,
+        by_epoch=False,
+        end=500,
+        factor=0.3333333333333333,
+        type='ConstantLR'),
     dict(
         begin=0,
         by_epoch=True,
@@ -132,7 +137,7 @@ test_dataloader = dict(
         ann_file='annotations/instances_val2017.json',
         backend_args=None,
         data_prefix=dict(img='images/val2017/'),
-        data_root='/root/.igie_cache/modelzoo_data/datasets/coco/',
+        data_root='data/coco',
         pipeline=[
             dict(backend_args=None, type='LoadImageFromFile'),
             dict(keep_ratio=True, scale=(
@@ -157,15 +162,14 @@ test_dataloader = dict(
     persistent_workers=True,
     sampler=dict(shuffle=False, type='DefaultSampler'))
 test_evaluator = dict(
-    ann_file=
-    '/root/.igie_cache/modelzoo_data/datasets/coco/annotations/instances_val2017.json',
+    ann_file='data/coco/annotations/instances_val2017.json',
     backend_args=None,
     format_only=False,
     metric='bbox',
     type='CocoMetric')
 test_pipeline = [
     dict(backend_args=None, type='LoadImageFromFile'),
-    dict(keep_ratio=True, scale=(
+    dict(keep_ratio=False, scale=(
         800,
         800,
     ), type='Resize'),
@@ -224,7 +228,7 @@ val_dataloader = dict(
         data_root='data/coco/',
         pipeline=[
             dict(backend_args=None, type='LoadImageFromFile'),
-            dict(keep_ratio=True, scale=(
+            dict(keep_ratio=False, scale=(
                 800,
                 800,
             ), type='Resize'),
@@ -260,4 +264,3 @@ visualizer = dict(
     vis_backends=[
         dict(type='LocalVisBackend'),
     ])
-work_dir = './'
diff --git a/models/cv/detection/fcos/igie/inference.py b/models/cv/detection/fcos/igie/inference.py
index e0517124..95b18899 100644
--- a/models/cv/detection/fcos/igie/inference.py
+++ b/models/cv/detection/fcos/igie/inference.py
@@ -20,7 +20,7 @@ import torch
 import numpy as np
 from tvm import relay
 from tqdm import tqdm
-from mmpose.registry import RUNNERS
+from mmdet.registry import RUNNERS
 from mmengine.config import Config
 
 def parse_args():
@@ -99,7 +99,7 @@ def main():
         # runner config
         cfg = Config.fromfile("fcos_r50_caffe_fpn_gn-head_1x_coco.py")
 
-        cfg.work_dir = "./"
+        cfg.work_dir = "./workspace"
         cfg['test_dataloader']['batch_size'] = batch_size
         cfg['test_dataloader']['dataset']['data_root'] = args.datasets
         cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
diff --git a/models/cv/detection/foveabox/igie/base/coco_detection.py b/models/cv/detection/foveabox/igie/base/coco_detection.py
deleted file mode 100644
index f58fe67b..00000000
--- a/models/cv/detection/foveabox/igie/base/coco_detection.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-# dataset settings
-dataset_type = 'CocoDataset'
-data_root = 'data/coco/'
-
-backend_args = None
-
-train_pipeline = [
-    dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
-    dict(type='RandomFlip', prob=0.5),
-    dict(type='PackDetInputs')
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
-    # If you don't have a gt annotation, delete the pipeline
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(
-        type='PackDetInputs',
-        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
-                   'scale_factor'))
-]
-train_dataloader = dict(
-    batch_size=2,
-    num_workers=2,
-    persistent_workers=True,
-    sampler=dict(type='DefaultSampler', shuffle=True),
-    batch_sampler=dict(type='AspectRatioBatchSampler'),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
-        filter_cfg=dict(filter_empty_gt=True, min_size=32),
-        pipeline=train_pipeline,
-        backend_args=backend_args))
-val_dataloader = dict(
-    batch_size=1,
-    num_workers=2,
-    persistent_workers=True,
-    drop_last=False,
-    sampler=dict(type='DefaultSampler', shuffle=False),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_val2017.json',
-        data_prefix=dict(img='val2017/'),
-        test_mode=True,
-        pipeline=test_pipeline,
-        backend_args=backend_args))
-test_dataloader = val_dataloader
-
-val_evaluator = dict(
-    type='CocoMetric',
-    ann_file=data_root + 'annotations/instances_val2017.json',
-    metric='bbox',
-    format_only=False,
-    backend_args=backend_args)
-test_evaluator = val_evaluator
\ No newline at end of file
diff --git a/models/cv/detection/foveabox/igie/base/default_runtime.py b/models/cv/detection/foveabox/igie/base/default_runtime.py
deleted file mode 100644
index 609d8037..00000000
--- a/models/cv/detection/foveabox/igie/base/default_runtime.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-default_scope = 'mmdet'
-
-default_hooks = dict(
-    timer=dict(type='IterTimerHook'),
-    logger=dict(type='LoggerHook', interval=50),
-    param_scheduler=dict(type='ParamSchedulerHook'),
-    checkpoint=dict(type='CheckpointHook', interval=1),
-    sampler_seed=dict(type='DistSamplerSeedHook'),
-    visualization=dict(type='DetVisualizationHook'))
-
-env_cfg = dict(
-    cudnn_benchmark=False,
-    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
-    dist_cfg=dict(backend='nccl'),
-)
-
-vis_backends = [dict(type='LocalVisBackend')]
-visualizer = dict(
-    type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
-log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
-
-log_level = 'INFO'
-load_from = None
-resume = False
diff --git a/models/cv/detection/foveabox/igie/base/schedule_1x.py b/models/cv/detection/foveabox/igie/base/schedule_1x.py
deleted file mode 100644
index 9b16d80c..00000000
--- a/models/cv/detection/foveabox/igie/base/schedule_1x.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-# training schedule for 1x
-train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
-val_cfg = dict(type='ValLoop')
-test_cfg = dict(type='TestLoop')
-
-# learning rate
-param_scheduler = [
-    dict(
-        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
-    dict(
-        type='MultiStepLR',
-        begin=0,
-        end=12,
-        by_epoch=True,
-        milestones=[8, 11],
-        gamma=0.1)
-]
-
-# optimizer
-optim_wrapper = dict(
-    type='OptimWrapper',
-    optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
-
-# Default setting for scaling LR automatically
-#   - `enable` means enable scaling LR automatically
-#       or not by default.
-#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
-auto_scale_lr = dict(enable=False, base_batch_size=16)
diff --git a/models/cv/detection/foveabox/igie/fovea_r50_fpn_4xb4-1x_coco.py b/models/cv/detection/foveabox/igie/fovea_r50_fpn_4xb4-1x_coco.py
index 79329f8d..5be9e50a 100644
--- a/models/cv/detection/foveabox/igie/fovea_r50_fpn_4xb4-1x_coco.py
+++ b/models/cv/detection/foveabox/igie/fovea_r50_fpn_4xb4-1x_coco.py
@@ -157,7 +157,7 @@ test_dataloader = dict(
         ann_file='annotations/instances_val2017.json',
         backend_args=None,
         data_prefix=dict(img='images/val2017/'),
-        data_root='/home/peng.yang/Datasets/coco',
+        data_root='data/coco/',
         pipeline=[
             dict(backend_args=None, type='LoadImageFromFile'),
             dict(keep_ratio=True, scale=(
@@ -182,7 +182,7 @@ test_dataloader = dict(
     persistent_workers=True,
     sampler=dict(shuffle=False, type='DefaultSampler'))
 test_evaluator = dict(
-    ann_file='/home/peng.yang/Datasets/coco/annotations/instances_val2017.json',
+    ann_file='data/coco/annotations/instances_val2017.json',
     backend_args=None,
     format_only=False,
     metric='bbox',
diff --git a/models/cv/detection/foveabox/igie/inference.py b/models/cv/detection/foveabox/igie/inference.py
index 6b31d898..ba4c846f 100644
--- a/models/cv/detection/foveabox/igie/inference.py
+++ b/models/cv/detection/foveabox/igie/inference.py
@@ -97,15 +97,17 @@ def main():
         for _ in range(args.warmup):
             module.run()
 
-        # Runner config
+        # runner config
         cfg = Config.fromfile("fovea_r50_fpn_4xb4-1x_coco.py")
-        cfg.work_dir = "./"
 
+        cfg.work_dir = "./workspace"
         cfg['test_dataloader']['batch_size'] = batch_size
         cfg['test_dataloader']['dataset']['data_root'] = args.datasets
         cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
         cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
-
+        cfg['log_level'] = 'ERROR'
+        
+        # build runner
         runner = RUNNERS.build(cfg)
         
         for input_data in tqdm(runner.test_dataloader):
diff --git a/models/cv/detection/fsaf/igie/README.md b/models/cv/detection/fsaf/igie/README.md
index 14d56932..5f8a4d4f 100644
--- a/models/cv/detection/fsaf/igie/README.md
+++ b/models/cv/detection/fsaf/igie/README.md
@@ -55,6 +55,10 @@ bash scripts/infer_fsaf_fp16_performance.sh
 
 ## Results
 
-|   Model   | BatchSize | Input Shape | Precision |    FPS    | mAP@0.5(%) |
-| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: |
-|    FSAF   |    32     |   800x800   |    FP16   |   122.35  |    0.530   |
\ No newline at end of file
+Model  |BatchSize  |Precision |FPS       |IOU@0.5   |IOU@0.5:0.95   |
+-------|-----------|----------|----------|----------|---------------|
+FSAF   |    32     |   FP16   | 122.35   |  0.530   |  0.345        |
+
+## Reference
+
+mmdetection: <https://github.com/open-mmlab/mmdetection.git>
diff --git a/models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py b/models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py
deleted file mode 100644
index 425d3d32..00000000
--- a/models/cv/detection/fsaf/igie/base/retinanet_r50_fpn_1x_coco.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-# model settings
-model = dict(
-    type='RetinaNet',
-    data_preprocessor=dict(
-        type='DetDataPreprocessor',
-        mean=[123.675, 116.28, 103.53],
-        std=[58.395, 57.12, 57.375],
-        bgr_to_rgb=True,
-        pad_size_divisor=32),
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        num_stages=4,
-        out_indices=(0, 1, 2, 3),
-        frozen_stages=1,
-        norm_cfg=dict(type='BN', requires_grad=True),
-        norm_eval=True,
-        style='pytorch',
-        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
-    neck=dict(
-        type='FPN',
-        in_channels=[256, 512, 1024, 2048],
-        out_channels=256,
-        start_level=1,
-        add_extra_convs='on_input',
-        num_outs=5),
-    bbox_head=dict(
-        type='RetinaHead',
-        num_classes=80,
-        in_channels=256,
-        stacked_convs=4,
-        feat_channels=256,
-        anchor_generator=dict(
-            type='AnchorGenerator',
-            octave_base_scale=4,
-            scales_per_octave=3,
-            ratios=[0.5, 1.0, 2.0],
-            strides=[8, 16, 32, 64, 128]),
-        bbox_coder=dict(
-            type='DeltaXYWHBBoxCoder',
-            target_means=[.0, .0, .0, .0],
-            target_stds=[1.0, 1.0, 1.0, 1.0]),
-        loss_cls=dict(
-            type='FocalLoss',
-            use_sigmoid=True,
-            gamma=2.0,
-            alpha=0.25,
-            loss_weight=1.0),
-        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
-    # model training and testing settings
-    train_cfg=dict(
-        assigner=dict(
-            type='MaxIoUAssigner',
-            pos_iou_thr=0.5,
-            neg_iou_thr=0.4,
-            min_pos_iou=0,
-            ignore_iof_thr=-1),
-        sampler=dict(
-            type='PseudoSampler'),  # Focal loss should use PseudoSampler
-        allowed_border=-1,
-        pos_weight=-1,
-        debug=False),
-    test_cfg=dict(
-        nms_pre=1000,
-        min_bbox_size=0,
-        score_thr=0.05,
-        nms=dict(type='nms', iou_threshold=0.5),
-        max_per_img=100))
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py b/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py
index 33c2df60..d511321f 100755
--- a/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py
+++ b/models/cv/detection/fsaf/igie/fsaf_r50_fpn_1x_coco.py
@@ -147,7 +147,7 @@ test_dataloader = dict(
         ann_file='annotations/instances_val2017.json',
         backend_args=None,
         data_prefix=dict(img='images/val2017/'),
-        data_root='/root/.igie_cache/modelzoo_data/datasets/coco/',
+        data_root='data/coco/',
         pipeline=[
             dict(backend_args=None, type='LoadImageFromFile'),
             dict(keep_ratio=False, scale=(
@@ -172,8 +172,7 @@ test_dataloader = dict(
     persistent_workers=True,
     sampler=dict(shuffle=False, type='DefaultSampler'))
 test_evaluator = dict(
-    ann_file=
-    '/root/.igie_cache/modelzoo_data/datasets/coco/annotations/instances_val2017.json',
+    ann_file='data/coco/annotations/instances_val2017.json',
     backend_args=None,
     format_only=False,
     metric='bbox',
@@ -274,5 +273,4 @@ visualizer = dict(
     type='DetLocalVisualizer',
     vis_backends=[
         dict(type='LocalVisBackend'),
-    ])
-work_dir = './'
+    ])
\ No newline at end of file
diff --git a/models/cv/detection/fsaf/igie/inference.py b/models/cv/detection/fsaf/igie/inference.py
index 058a5343..7d128c3d 100644
--- a/models/cv/detection/fsaf/igie/inference.py
+++ b/models/cv/detection/fsaf/igie/inference.py
@@ -20,7 +20,7 @@ import torch
 import numpy as np
 from tvm import relay
 from tqdm import tqdm
-from mmpose.registry import RUNNERS
+from mmdet.registry import RUNNERS
 from mmengine.config import Config
 
 def parse_args():
@@ -99,7 +99,7 @@ def main():
         # runner config
         cfg = Config.fromfile("fsaf_r50_fpn_1x_coco.py")
 
-        cfg.work_dir = "./"
+        cfg.work_dir = "./workspace"
         cfg['test_dataloader']['batch_size'] = batch_size
         cfg['test_dataloader']['dataset']['data_root'] = args.datasets
         cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
diff --git a/models/cv/detection/hrnet/igie/base/coco_detection.py b/models/cv/detection/hrnet/igie/base/coco_detection.py
deleted file mode 100644
index f58fe67b..00000000
--- a/models/cv/detection/hrnet/igie/base/coco_detection.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-# dataset settings
-dataset_type = 'CocoDataset'
-data_root = 'data/coco/'
-
-backend_args = None
-
-train_pipeline = [
-    dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
-    dict(type='RandomFlip', prob=0.5),
-    dict(type='PackDetInputs')
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
-    # If you don't have a gt annotation, delete the pipeline
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(
-        type='PackDetInputs',
-        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
-                   'scale_factor'))
-]
-train_dataloader = dict(
-    batch_size=2,
-    num_workers=2,
-    persistent_workers=True,
-    sampler=dict(type='DefaultSampler', shuffle=True),
-    batch_sampler=dict(type='AspectRatioBatchSampler'),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
-        filter_cfg=dict(filter_empty_gt=True, min_size=32),
-        pipeline=train_pipeline,
-        backend_args=backend_args))
-val_dataloader = dict(
-    batch_size=1,
-    num_workers=2,
-    persistent_workers=True,
-    drop_last=False,
-    sampler=dict(type='DefaultSampler', shuffle=False),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_val2017.json',
-        data_prefix=dict(img='val2017/'),
-        test_mode=True,
-        pipeline=test_pipeline,
-        backend_args=backend_args))
-test_dataloader = val_dataloader
-
-val_evaluator = dict(
-    type='CocoMetric',
-    ann_file=data_root + 'annotations/instances_val2017.json',
-    metric='bbox',
-    format_only=False,
-    backend_args=backend_args)
-test_evaluator = val_evaluator
\ No newline at end of file
diff --git a/models/cv/detection/hrnet/igie/base/default_runtime.py b/models/cv/detection/hrnet/igie/base/default_runtime.py
deleted file mode 100644
index 609d8037..00000000
--- a/models/cv/detection/hrnet/igie/base/default_runtime.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-default_scope = 'mmdet'
-
-default_hooks = dict(
-    timer=dict(type='IterTimerHook'),
-    logger=dict(type='LoggerHook', interval=50),
-    param_scheduler=dict(type='ParamSchedulerHook'),
-    checkpoint=dict(type='CheckpointHook', interval=1),
-    sampler_seed=dict(type='DistSamplerSeedHook'),
-    visualization=dict(type='DetVisualizationHook'))
-
-env_cfg = dict(
-    cudnn_benchmark=False,
-    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
-    dist_cfg=dict(backend='nccl'),
-)
-
-vis_backends = [dict(type='LocalVisBackend')]
-visualizer = dict(
-    type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
-log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
-
-log_level = 'INFO'
-load_from = None
-resume = False
diff --git a/models/cv/detection/hrnet/igie/base/fcos_hrnetv2p-w32-gn-head_4xb4-1x_coco.py b/models/cv/detection/hrnet/igie/base/fcos_hrnetv2p-w32-gn-head_4xb4-1x_coco.py
deleted file mode 100644
index 17b1a876..00000000
--- a/models/cv/detection/hrnet/igie/base/fcos_hrnetv2p-w32-gn-head_4xb4-1x_coco.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-_base_ = 'fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py'
-model = dict(
-    data_preprocessor=dict(
-        mean=[103.53, 116.28, 123.675],
-        std=[57.375, 57.12, 58.395],
-        bgr_to_rgb=False),
-    backbone=dict(
-        _delete_=True,
-        type='HRNet',
-        extra=dict(
-            stage1=dict(
-                num_modules=1,
-                num_branches=1,
-                block='BOTTLENECK',
-                num_blocks=(4, ),
-                num_channels=(64, )),
-            stage2=dict(
-                num_modules=1,
-                num_branches=2,
-                block='BASIC',
-                num_blocks=(4, 4),
-                num_channels=(32, 64)),
-            stage3=dict(
-                num_modules=4,
-                num_branches=3,
-                block='BASIC',
-                num_blocks=(4, 4, 4),
-                num_channels=(32, 64, 128)),
-            stage4=dict(
-                num_modules=3,
-                num_branches=4,
-                block='BASIC',
-                num_blocks=(4, 4, 4, 4),
-                num_channels=(32, 64, 128, 256))),
-        init_cfg=dict(
-            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')),
-    neck=dict(
-        _delete_=True,
-        type='HRFPN',
-        in_channels=[32, 64, 128, 256],
-        out_channels=256,
-        stride=2,
-        num_outs=5))
diff --git a/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_1x_coco.py b/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_1x_coco.py
deleted file mode 100644
index 9e04ad5f..00000000
--- a/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_1x_coco.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-_base_ = [
-    'coco_detection.py',
-    'schedule_1x.py', 'default_runtime.py'
-]
-
-# model settings
-model = dict(
-    type='FCOS',
-    data_preprocessor=dict(
-        type='DetDataPreprocessor',
-        mean=[102.9801, 115.9465, 122.7717],
-        std=[1.0, 1.0, 1.0],
-        bgr_to_rgb=False,
-        pad_size_divisor=32),
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        num_stages=4,
-        out_indices=(0, 1, 2, 3),
-        frozen_stages=1,
-        norm_cfg=dict(type='BN', requires_grad=False),
-        norm_eval=True,
-        style='caffe',
-        init_cfg=dict(
-            type='Pretrained',
-            checkpoint='open-mmlab://detectron/resnet50_caffe')),
-    neck=dict(
-        type='FPN',
-        in_channels=[256, 512, 1024, 2048],
-        out_channels=256,
-        start_level=1,
-        add_extra_convs='on_output',  # use P5
-        num_outs=5,
-        relu_before_extra_convs=True),
-    bbox_head=dict(
-        type='FCOSHead',
-        num_classes=80,
-        in_channels=256,
-        stacked_convs=4,
-        feat_channels=256,
-        strides=[8, 16, 32, 64, 128],
-        loss_cls=dict(
-            type='FocalLoss',
-            use_sigmoid=True,
-            gamma=2.0,
-            alpha=0.25,
-            loss_weight=1.0),
-        loss_bbox=dict(type='IoULoss', loss_weight=1.0),
-        loss_centerness=dict(
-            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
-    # testing settings
-    test_cfg=dict(
-        nms_pre=1000,
-        min_bbox_size=0,
-        score_thr=0.05,
-        nms=dict(type='nms', iou_threshold=0.5),
-        max_per_img=100))
-
-# learning rate
-param_scheduler = [
-    dict(type='ConstantLR', factor=1.0 / 3, by_epoch=False, begin=0, end=500),
-    dict(
-        type='MultiStepLR',
-        begin=0,
-        end=12,
-        by_epoch=True,
-        milestones=[8, 11],
-        gamma=0.1)
-]
-
-# optimizer
-optim_wrapper = dict(
-    optimizer=dict(lr=0.01),
-    paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
-    clip_grad=dict(max_norm=35, norm_type=2))
diff --git a/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py b/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py
deleted file mode 100644
index 3f9af61a..00000000
--- a/models/cv/detection/hrnet/igie/base/fcos_r50-caffe_fpn_gn-head_4xb4-1x_coco.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-_base_ = 'fcos_r50-caffe_fpn_gn-head_1x_coco.py'
-
-# dataset settings
-train_dataloader = dict(batch_size=4, num_workers=4)
diff --git a/models/cv/detection/hrnet/igie/base/schedule_1x.py b/models/cv/detection/hrnet/igie/base/schedule_1x.py
deleted file mode 100644
index 9b16d80c..00000000
--- a/models/cv/detection/hrnet/igie/base/schedule_1x.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-# training schedule for 1x
-train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
-val_cfg = dict(type='ValLoop')
-test_cfg = dict(type='TestLoop')
-
-# learning rate
-param_scheduler = [
-    dict(
-        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
-    dict(
-        type='MultiStepLR',
-        begin=0,
-        end=12,
-        by_epoch=True,
-        milestones=[8, 11],
-        gamma=0.1)
-]
-
-# optimizer
-optim_wrapper = dict(
-    type='OptimWrapper',
-    optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
-
-# Default setting for scaling LR automatically
-#   - `enable` means enable scaling LR automatically
-#       or not by default.
-#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
-auto_scale_lr = dict(enable=False, base_batch_size=16)
diff --git a/models/cv/detection/hrnet/igie/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py b/models/cv/detection/hrnet/igie/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py
index 010dee7d..cf81f4fc 100644
--- a/models/cv/detection/hrnet/igie/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py
+++ b/models/cv/detection/hrnet/igie/fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py
@@ -30,7 +30,7 @@ env_cfg = dict(
     dist_cfg=dict(backend='nccl'),
     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
 load_from = None
-log_level = 'INFO'
+log_level = 'ERROR'
 log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
 model = dict(
     backbone=dict(
@@ -171,7 +171,7 @@ test_dataloader = dict(
         ann_file='annotations/instances_val2017.json',
         backend_args=None,
         data_prefix=dict(img='images/val2017/'),
-        data_root='/home/peng.yang/Datasets/coco',
+        data_root='data/coco/',
         pipeline=[
             dict(backend_args=None, type='LoadImageFromFile'),
             dict(keep_ratio=True, scale=(
@@ -196,7 +196,7 @@ test_dataloader = dict(
     persistent_workers=True,
     sampler=dict(shuffle=False, type='DefaultSampler'))
 test_evaluator = dict(
-    ann_file='/home/peng.yang/Datasets/coco/annotations/instances_val2017.json',
+    ann_file='data/coco/annotations/instances_val2017.json',
     backend_args=None,
     format_only=False,
     metric='bbox',
@@ -232,7 +232,7 @@ train_dataloader = dict(
             dict(backend_args=None, type='LoadImageFromFile'),
             dict(type='LoadAnnotations', with_bbox=True),
             dict(keep_ratio=True, scale=(
-                800,
+                1333,
                 800,
             ), type='Resize'),
             dict(prob=0.5, type='RandomFlip'),
@@ -246,7 +246,7 @@ train_pipeline = [
     dict(backend_args=None, type='LoadImageFromFile'),
     dict(type='LoadAnnotations', with_bbox=True),
     dict(keep_ratio=True, scale=(
-        800,
+        1333,
         800,
     ), type='Resize'),
     dict(prob=0.5, type='RandomFlip'),
@@ -297,5 +297,4 @@ visualizer = dict(
     type='DetLocalVisualizer',
     vis_backends=[
         dict(type='LocalVisBackend'),
-    ])
-work_dir = './'
+    ])
\ No newline at end of file
diff --git a/models/cv/detection/hrnet/igie/inference.py b/models/cv/detection/hrnet/igie/inference.py
index 310eaaef..601c7a18 100644
--- a/models/cv/detection/hrnet/igie/inference.py
+++ b/models/cv/detection/hrnet/igie/inference.py
@@ -85,7 +85,7 @@ def main():
 
     # create runtime from engine
     module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
-
+    
     # just run perf test
     if args.perf_only:
         ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
@@ -99,12 +99,13 @@ def main():
 
         # Runner config
         cfg = Config.fromfile("fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py")
-        cfg.work_dir = "./"
-
+        
+        cfg.work_dir = "./workspace"
         cfg['test_dataloader']['batch_size'] = batch_size
         cfg['test_dataloader']['dataset']['data_root'] = args.datasets
         cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
         cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+        cfg['log_level'] = 'ERROR'
 
         runner = RUNNERS.build(cfg)
         
diff --git a/models/cv/detection/paa/igie/README.md b/models/cv/detection/paa/igie/README.md
index bc9e0b3e..fb701eeb 100644
--- a/models/cv/detection/paa/igie/README.md
+++ b/models/cv/detection/paa/igie/README.md
@@ -58,7 +58,7 @@ bash scripts/infer_paa_fp16_performance.sh
 
 Model  |BatchSize  |Precision |FPS       |IOU@0.5   |IOU@0.5:0.95   |
 -------|-----------|----------|----------|----------|---------------|
-PAA    |    32     |   FP16   | 138.414  |  0.551   |  0.377        |
+PAA    |    32     |   FP16   | 138.414  |  0.555   |  0.381        |
 
 ## Reference
 
diff --git a/models/cv/detection/paa/igie/base/coco_detection.py b/models/cv/detection/paa/igie/base/coco_detection.py
deleted file mode 100644
index 9abe9e69..00000000
--- a/models/cv/detection/paa/igie/base/coco_detection.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-# dataset settings
-dataset_type = 'CocoDataset'
-data_root = 'data/coco/'
-
-backend_args = None
-
-train_pipeline = [
-    dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='Resize', scale=(1333, 800), keep_ratio=True),
-    dict(type='RandomFlip', prob=0.5),
-    dict(type='PackDetInputs')
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile', backend_args=backend_args),
-    dict(type='Resize', scale=(800, 800), keep_ratio=False),
-    # If you don't have a gt annotation, delete the pipeline
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(
-        type='PackDetInputs',
-        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
-                   'scale_factor'))
-]
-train_dataloader = dict(
-    batch_size=2,
-    num_workers=2,
-    persistent_workers=True,
-    sampler=dict(type='DefaultSampler', shuffle=True),
-    batch_sampler=dict(type='AspectRatioBatchSampler'),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
-        filter_cfg=dict(filter_empty_gt=True, min_size=32),
-        pipeline=train_pipeline,
-        backend_args=backend_args))
-val_dataloader = dict(
-    batch_size=1,
-    num_workers=2,
-    persistent_workers=True,
-    drop_last=False,
-    sampler=dict(type='DefaultSampler', shuffle=False),
-    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_val2017.json',
-        data_prefix=dict(img='val2017/'),
-        test_mode=True,
-        pipeline=test_pipeline,
-        backend_args=backend_args))
-test_dataloader = val_dataloader
-
-val_evaluator = dict(
-    type='CocoMetric',
-    ann_file=data_root + 'annotations/instances_val2017.json',
-    metric='bbox',
-    format_only=False,
-    backend_args=backend_args)
-test_evaluator = val_evaluator
\ No newline at end of file
diff --git a/models/cv/detection/paa/igie/base/default_runtime.py b/models/cv/detection/paa/igie/base/default_runtime.py
deleted file mode 100644
index 609d8037..00000000
--- a/models/cv/detection/paa/igie/base/default_runtime.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-default_scope = 'mmdet'
-
-default_hooks = dict(
-    timer=dict(type='IterTimerHook'),
-    logger=dict(type='LoggerHook', interval=50),
-    param_scheduler=dict(type='ParamSchedulerHook'),
-    checkpoint=dict(type='CheckpointHook', interval=1),
-    sampler_seed=dict(type='DistSamplerSeedHook'),
-    visualization=dict(type='DetVisualizationHook'))
-
-env_cfg = dict(
-    cudnn_benchmark=False,
-    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
-    dist_cfg=dict(backend='nccl'),
-)
-
-vis_backends = [dict(type='LocalVisBackend')]
-visualizer = dict(
-    type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
-log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
-
-log_level = 'INFO'
-load_from = None
-resume = False
diff --git a/models/cv/detection/paa/igie/base/schedule_1x.py b/models/cv/detection/paa/igie/base/schedule_1x.py
deleted file mode 100644
index 9b16d80c..00000000
--- a/models/cv/detection/paa/igie/base/schedule_1x.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-# training schedule for 1x
-train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
-val_cfg = dict(type='ValLoop')
-test_cfg = dict(type='TestLoop')
-
-# learning rate
-param_scheduler = [
-    dict(
-        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
-    dict(
-        type='MultiStepLR',
-        begin=0,
-        end=12,
-        by_epoch=True,
-        milestones=[8, 11],
-        gamma=0.1)
-]
-
-# optimizer
-optim_wrapper = dict(
-    type='OptimWrapper',
-    optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
-
-# Default setting for scaling LR automatically
-#   - `enable` means enable scaling LR automatically
-#       or not by default.
-#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
-auto_scale_lr = dict(enable=False, base_batch_size=16)
diff --git a/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py b/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py
index a625e801..2797e347 100644
--- a/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py
+++ b/models/cv/detection/paa/igie/paa_r50_fpn_1x_coco.py
@@ -1,80 +1,291 @@
-_base_ = [
-    'base/coco_detection.py',
-    'base/schedule_1x.py', 'base/default_runtime.py'
-]
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
 
-# model settings
+auto_scale_lr = dict(base_batch_size=16, enable=False)
+backend_args = None
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+default_hooks = dict(
+    checkpoint=dict(interval=1, type='CheckpointHook'),
+    logger=dict(interval=50, type='LoggerHook'),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    timer=dict(type='IterTimerHook'),
+    visualization=dict(type='DetVisualizationHook'))
+default_scope = 'mmdet'
+env_cfg = dict(
+    cudnn_benchmark=False,
+    dist_cfg=dict(backend='nccl'),
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+load_from = None
+log_level = 'ERROR'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
 model = dict(
-    type='PAA',
-    data_preprocessor=dict(
-        type='DetDataPreprocessor',
-        mean=[123.675, 116.28, 103.53],
-        std=[58.395, 57.12, 57.375],
-        bgr_to_rgb=True,
-        pad_size_divisor=32),
     backbone=dict(
-        type='ResNet',
         depth=50,
-        num_stages=4,
-        out_indices=(0, 1, 2, 3),
         frozen_stages=1,
-        norm_cfg=dict(type='BN', requires_grad=True),
+        init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'),
+        norm_cfg=dict(requires_grad=True, type='BN'),
         norm_eval=True,
+        num_stages=4,
+        out_indices=(
+            0,
+            1,
+            2,
+            3,
+        ),
         style='pytorch',
-        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
-    neck=dict(
-        type='FPN',
-        in_channels=[256, 512, 1024, 2048],
-        out_channels=256,
-        start_level=1,
-        add_extra_convs='on_output',
-        num_outs=5),
+        type='ResNet'),
     bbox_head=dict(
-        type='PAAHead',
-        reg_decoded_bbox=True,
-        score_voting=True,
-        topk=9,
-        num_classes=80,
-        in_channels=256,
-        stacked_convs=4,
-        feat_channels=256,
         anchor_generator=dict(
-            type='AnchorGenerator',
-            ratios=[1.0],
             octave_base_scale=8,
+            ratios=[
+                1.0,
+            ],
             scales_per_octave=1,
-            strides=[8, 16, 32, 64, 128]),
+            strides=[
+                8,
+                16,
+                32,
+                64,
+                128,
+            ],
+            type='AnchorGenerator'),
         bbox_coder=dict(
-            type='DeltaXYWHBBoxCoder',
-            target_means=[.0, .0, .0, .0],
-            target_stds=[0.1, 0.1, 0.2, 0.2]),
+            target_means=[
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+            ],
+            target_stds=[
+                0.1,
+                0.1,
+                0.2,
+                0.2,
+            ],
+            type='DeltaXYWHBBoxCoder'),
+        feat_channels=256,
+        in_channels=256,
+        loss_bbox=dict(loss_weight=1.3, type='GIoULoss'),
+        loss_centerness=dict(
+            loss_weight=0.5, type='CrossEntropyLoss', use_sigmoid=True),
         loss_cls=dict(
-            type='FocalLoss',
-            use_sigmoid=True,
-            gamma=2.0,
             alpha=0.25,
-            loss_weight=1.0),
-        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
-        loss_centerness=dict(
-            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),
-    # training and testing settings
+            gamma=2.0,
+            loss_weight=1.0,
+            type='FocalLoss',
+            use_sigmoid=True),
+        num_classes=80,
+        reg_decoded_bbox=True,
+        score_voting=True,
+        stacked_convs=4,
+        topk=9,
+        type='PAAHead'),
+    data_preprocessor=dict(
+        bgr_to_rgb=True,
+        mean=[
+            123.675,
+            116.28,
+            103.53,
+        ],
+        pad_size_divisor=32,
+        std=[
+            58.395,
+            57.12,
+            57.375,
+        ],
+        type='DetDataPreprocessor'),
+    neck=dict(
+        add_extra_convs='on_output',
+        in_channels=[
+            256,
+            512,
+            1024,
+            2048,
+        ],
+        num_outs=5,
+        out_channels=256,
+        start_level=1,
+        type='FPN'),
+    test_cfg=dict(
+        max_per_img=100,
+        min_bbox_size=0,
+        nms=dict(iou_threshold=0.6, type='nms'),
+        nms_pre=1000,
+        score_thr=0.05),
     train_cfg=dict(
+        allowed_border=-1,
         assigner=dict(
-            type='MaxIoUAssigner',
-            pos_iou_thr=0.1,
-            neg_iou_thr=0.1,
+            ignore_iof_thr=-1,
             min_pos_iou=0,
-            ignore_iof_thr=-1),
-        allowed_border=-1,
-        pos_weight=-1,
-        debug=False),
-    test_cfg=dict(
-        nms_pre=1000,
-        min_bbox_size=0,
-        score_thr=0.05,
-        nms=dict(type='nms', iou_threshold=0.6),
-        max_per_img=100))
-# optimizer
+            neg_iou_thr=0.1,
+            pos_iou_thr=0.1,
+            type='MaxIoUAssigner'),
+        debug=False,
+        pos_weight=-1),
+    type='PAA')
 optim_wrapper = dict(
-    type='OptimWrapper',
-    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
\ No newline at end of file
+    optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001),
+    type='OptimWrapper')
+param_scheduler = [
+    dict(
+        begin=0, by_epoch=False, end=500, start_factor=0.001, type='LinearLR'),
+    dict(
+        begin=0,
+        by_epoch=True,
+        end=12,
+        gamma=0.1,
+        milestones=[
+            8,
+            11,
+        ],
+        type='MultiStepLR'),
+]
+resume = False
+test_cfg = dict(type='TestLoop')
+test_dataloader = dict(
+    batch_size=32,
+    dataset=dict(
+        ann_file='annotations/instances_val2017.json',
+        backend_args=None,
+        data_prefix=dict(img='images/val2017/'),
+        data_root='data/coco/',
+        pipeline=[
+            dict(backend_args=None, type='LoadImageFromFile'),
+            dict(keep_ratio=True, scale=(
+                800,
+                800,
+            ), type='Resize'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(
+                meta_keys=(
+                    'img_id',
+                    'img_path',
+                    'ori_shape',
+                    'img_shape',
+                    'scale_factor',
+                ),
+                type='PackDetInputs'),
+        ],
+        test_mode=True,
+        type='CocoDataset'),
+    drop_last=False,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+    ann_file='data/coco/annotations/instances_val2017.json',
+    backend_args=None,
+    format_only=False,
+    metric='bbox',
+    type='CocoMetric')
+test_pipeline = [
+    dict(backend_args=None, type='LoadImageFromFile'),
+    dict(keep_ratio=True, scale=(
+        800,
+        800,
+    ), type='Resize'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        meta_keys=(
+            'img_id',
+            'img_path',
+            'ori_shape',
+            'img_shape',
+            'scale_factor',
+        ),
+        type='PackDetInputs'),
+]
+train_cfg = dict(max_epochs=12, type='EpochBasedTrainLoop', val_interval=1)
+train_dataloader = dict(
+    batch_sampler=dict(type='AspectRatioBatchSampler'),
+    batch_size=2,
+    dataset=dict(
+        ann_file='annotations/instances_train2017.json',
+        backend_args=None,
+        data_prefix=dict(img='train2017/'),
+        data_root='data/coco/',
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=[
+            dict(backend_args=None, type='LoadImageFromFile'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(keep_ratio=True, scale=(
+                1333,
+                800,
+            ), type='Resize'),
+            dict(prob=0.5, type='RandomFlip'),
+            dict(type='PackDetInputs'),
+        ],
+        type='CocoDataset'),
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+    dict(backend_args=None, type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(keep_ratio=True, scale=(
+        1333,
+        800,
+    ), type='Resize'),
+    dict(prob=0.5, type='RandomFlip'),
+    dict(type='PackDetInputs'),
+]
+val_cfg = dict(type='ValLoop')
+val_dataloader = dict(
+    batch_size=1,
+    dataset=dict(
+        ann_file='annotations/instances_val2017.json',
+        backend_args=None,
+        data_prefix=dict(img='val2017/'),
+        data_root='data/coco/',
+        pipeline=[
+            dict(backend_args=None, type='LoadImageFromFile'),
+            dict(keep_ratio=True, scale=(
+                800,
+                800,
+            ), type='Resize'),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(
+                meta_keys=(
+                    'img_id',
+                    'img_path',
+                    'ori_shape',
+                    'img_shape',
+                    'scale_factor',
+                ),
+                type='PackDetInputs'),
+        ],
+        test_mode=True,
+        type='CocoDataset'),
+    drop_last=False,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+    ann_file='data/coco/annotations/instances_val2017.json',
+    backend_args=None,
+    format_only=False,
+    metric='bbox',
+    type='CocoMetric')
+vis_backends = [
+    dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+    name='visualizer',
+    type='DetLocalVisualizer',
+    vis_backends=[
+        dict(type='LocalVisBackend'),
+    ])
diff --git a/models/cv/detection/retinanet/igie/inference.py b/models/cv/detection/retinanet/igie/inference.py
index 7e47529e..75922190 100644
--- a/models/cv/detection/retinanet/igie/inference.py
+++ b/models/cv/detection/retinanet/igie/inference.py
@@ -97,15 +97,17 @@ def main():
         for _ in range(args.warmup):
             module.run()
 
-        # Runner config
+        # runner config
         cfg = Config.fromfile("retinanet_r50_fpn_1x_coco.py")
-        cfg.work_dir = "./"
 
+        cfg.work_dir = "./workspace"
         cfg['test_dataloader']['batch_size'] = batch_size
         cfg['test_dataloader']['dataset']['data_root'] = args.datasets
         cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
         cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
+        cfg['log_level'] = 'ERROR'
 
+        # build runner
         runner = RUNNERS.build(cfg)
         
         for input_data in tqdm(runner.test_dataloader):
diff --git a/models/cv/detection/retinanet/igie/retinanet_r50_fpn_1x_coco.py b/models/cv/detection/retinanet/igie/retinanet_r50_fpn_1x_coco.py
index 07570f1b..d176b02d 100644
--- a/models/cv/detection/retinanet/igie/retinanet_r50_fpn_1x_coco.py
+++ b/models/cv/detection/retinanet/igie/retinanet_r50_fpn_1x_coco.py
@@ -165,7 +165,7 @@ test_dataloader = dict(
         ann_file='annotations/instances_val2017.json',
         backend_args=None,
         data_prefix=dict(img='images/val2017/'),
-        data_root='/root/.igie_cache/data/datasets/coco/',
+        data_root='data/coco/',
         pipeline=[
             dict(backend_args=None, type='LoadImageFromFile'),
             dict(keep_ratio=False, scale=(
@@ -190,8 +190,7 @@ test_dataloader = dict(
     persistent_workers=True,
     sampler=dict(shuffle=False, type='DefaultSampler'))
 test_evaluator = dict(
-    ann_file=
-    '/root/.igie_cache/data/datasets/coco/annotations/instances_val2017.json',
+    ann_file='data/coco/annotations/instances_val2017.json',
     backend_args=None,
     format_only=False,
     metric='bbox',
@@ -220,7 +219,7 @@ val_dataloader = dict(
         ann_file='annotations/instances_val2017.json',
         backend_args=None,
         data_prefix=dict(img='images/val2017/'),
-        data_root='/root/.igie_cache/data/datasets/coco/',
+        data_root='data/coco/',
         pipeline=[
             dict(backend_args=None, type='LoadImageFromFile'),
             dict(keep_ratio=False, scale=(
@@ -245,10 +244,8 @@ val_dataloader = dict(
     persistent_workers=True,
     sampler=dict(shuffle=False, type='DefaultSampler'))
 val_evaluator = dict(
-    ann_file=
-    '/root/.igie_cache/data/datasets/coco/annotations/instances_val2017.json',
+    ann_file='data/coco/annotations/instances_val2017.json',
     backend_args=None,
     format_only=False,
     metric='bbox',
     type='CocoMetric')
-work_dir = './'
diff --git a/models/cv/detection/rtmdet/igie/README.md b/models/cv/detection/rtmdet/igie/README.md
index 737d203c..3d25cec4 100644
--- a/models/cv/detection/rtmdet/igie/README.md
+++ b/models/cv/detection/rtmdet/igie/README.md
@@ -19,13 +19,12 @@ pip3 install onnx
 pip3 install tqdm
 pip3 install onnxsim
 pip3 install mmdet==3.3.0
-pip3 install mmpose==1.3.1
 pip3 install mmdeploy==1.3.1
 pip3 install mmengine==0.10.4
 ```
 
 ### Download
-Pretrained model: https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth 
+Pretrained model: <https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth>
 
 Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
 
@@ -55,7 +54,11 @@ bash scripts/infer_rtmdet_fp16_performance.sh
 ```
 
 ## Results
+Model     |BatchSize  |Precision |FPS       |IOU@0.5   |IOU@0.5:0.95   |
+----------|-----------|----------|----------|----------|---------------|
+RTMDet    |    32     |   FP16   | 2627.15  |  0.619   |  0.403        |
 
-|   Model   | BatchSize | Input Shape | Precision |    FPS    | mAP@0.5(%) |
-| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: |
-|   RTMDet  |    32     |   320x320   |    FP16   |  2627.15  |    0.619   |
\ No newline at end of file
+
+## Reference
+
+mmdetection: <https://github.com/open-mmlab/mmdetection.git>
diff --git a/models/cv/detection/rtmdet/igie/inference.py b/models/cv/detection/rtmdet/igie/inference.py
index 4c89904b..43099007 100644
--- a/models/cv/detection/rtmdet/igie/inference.py
+++ b/models/cv/detection/rtmdet/igie/inference.py
@@ -20,7 +20,7 @@ import torch
 import numpy as np
 from tvm import relay
 from tqdm import tqdm
-from mmpose.registry import RUNNERS
+from mmdet.registry import RUNNERS
 from mmengine.config import Config
 
 def parse_args():
@@ -99,7 +99,7 @@ def main():
         # runner config
         cfg = Config.fromfile("rtmdet_nano_320-8xb32_coco-person.py")
 
-        cfg.work_dir = "./"
+        cfg.work_dir = "./workspace"
         cfg['test_dataloader']['batch_size'] = batch_size
         cfg['test_dataloader']['dataset']['data_root'] = args.datasets
         cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
diff --git a/models/cv/detection/rtmdet/igie/rtmdet_nano_320-8xb32_coco-person.py b/models/cv/detection/rtmdet/igie/rtmdet_nano_320-8xb32_coco-person.py
index 6e71fb66..1b790e55 100644
--- a/models/cv/detection/rtmdet/igie/rtmdet_nano_320-8xb32_coco-person.py
+++ b/models/cv/detection/rtmdet/igie/rtmdet_nano_320-8xb32_coco-person.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
 auto_scale_lr = dict(base_batch_size=16, enable=False)
 backend_args = None
 base_lr = 0.004
@@ -192,7 +207,7 @@ test_dataloader = dict(
         ann_file='annotations/instances_val2017.json',
         backend_args=None,
         data_prefix=dict(img='images/val2017/'),
-        data_root='/root/.igie_cache/data/datasets/coco',
+        data_root='/data/coco',
         metainfo=dict(classes=('person', )),
         pipeline=[
             dict(type='LoadImageFromFile'),
@@ -229,8 +244,7 @@ test_dataloader = dict(
     sampler=dict(_scope_='mmdet', shuffle=False, type='DefaultSampler'))
 test_evaluator = dict(
     _scope_='mmdet',
-    ann_file=
-    '/root/.igie_cache/data/datasets/coco/annotations/person_keypoints_val2017.json',
+    ann_file='data/coco/annotations/person_keypoints_val2017.json',
     backend_args=None,
     format_only=False,
     metric='bbox',
diff --git a/models/cv/pose_estimation/rtmpose/igie/README.md b/models/cv/pose_estimation/rtmpose/igie/README.md
index 2b7fb929..d85e1cf1 100644
--- a/models/cv/pose_estimation/rtmpose/igie/README.md
+++ b/models/cv/pose_estimation/rtmpose/igie/README.md
@@ -25,7 +25,7 @@ pip3 install mmengine==0.10.4
 ```
 
 ### Download
-Pretrained model: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth
+Pretrained model: <https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth>
 
 Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
 
@@ -56,6 +56,11 @@ bash scripts/infer_rtmpose_fp16_performance.sh
 
 ## Results
 
-|   Model   | BatchSize | Input Shape | Precision |    FPS    | mAP@0.5(%) |
-| :-------: | :-------: | :---------: | :-------: | :-------: | :--------: |
-|  RTMPose  |    32     |   252x196   |    FP16   |  2313.33  |    0.936   |
+Model     |BatchSize  |Precision |FPS       |IOU@0.5   |IOU@0.5:0.95   |
+----------|-----------|----------|----------|----------|---------------|
+RTMPose   |    32     |   FP16   | 2313.33  |  0.936   |  0.773        |
+
+
+## Reference
+
+mmpose: <https://github.com/open-mmlab/mmpose.git>
\ No newline at end of file
diff --git a/models/cv/pose_estimation/rtmpose/igie/inference.py b/models/cv/pose_estimation/rtmpose/igie/inference.py
index d01502e2..6138fab6 100644
--- a/models/cv/pose_estimation/rtmpose/igie/inference.py
+++ b/models/cv/pose_estimation/rtmpose/igie/inference.py
@@ -99,7 +99,7 @@ def main():
         # runner config
         cfg = Config.fromfile("rtmpose-m_8xb256-420e_coco-256x192.py")
 
-        cfg.work_dir = "./"
+        cfg.work_dir = "./workspace"
         cfg['test_dataloader']['batch_size'] = batch_size
         cfg['test_dataloader']['dataset']['data_root'] = args.datasets
         cfg['test_dataloader']['dataset']['data_prefix']['img'] = 'images/val2017/'
diff --git a/models/cv/pose_estimation/rtmpose/igie/rtmpose-m_8xb256-420e_coco-256x192.py b/models/cv/pose_estimation/rtmpose/igie/rtmpose-m_8xb256-420e_coco-256x192.py
index 9428b7fe..cd116625 100644
--- a/models/cv/pose_estimation/rtmpose/igie/rtmpose-m_8xb256-420e_coco-256x192.py
+++ b/models/cv/pose_estimation/rtmpose/igie/rtmpose-m_8xb256-420e_coco-256x192.py
@@ -235,7 +235,7 @@ test_dataloader = dict(
         ann_file='annotations/person_keypoints_val2017.json',
         data_mode='topdown',
         data_prefix=dict(img='images/val2017/'),
-        data_root='/root/.igie_cache/data/datasets/coco',
+        data_root='data/coco/',
         pipeline=[
             dict(backend_args=dict(backend='local'), type='LoadImage'),
             dict(type='GetBBoxCenterScale'),
@@ -252,8 +252,7 @@ test_dataloader = dict(
     persistent_workers=True,
     sampler=dict(round_up=False, shuffle=False, type='DefaultSampler'))
 test_evaluator = dict(
-    ann_file=
-    '/root/.igie_cache/data/datasets/coco/annotations/person_keypoints_val2017.json',
+    ann_file='data/coco/annotations/person_keypoints_val2017.json',
     type='CocoMetric')
 train_batch_size = 256
 train_cfg = dict(by_epoch=True, max_epochs=420, val_interval=10)
@@ -462,4 +461,3 @@ visualizer = dict(
     vis_backends=[
         dict(type='LocalVisBackend'),
     ])
-work_dir = './'
-- 
Gitee


From e135208fbca20b05e53a48d9706c146f9a07ef1c Mon Sep 17 00:00:00 2001
From: YoungPeng <peng.yang@iluvatar.com>
Date: Tue, 15 Oct 2024 14:17:18 +0800
Subject: [PATCH 18/18] Add: kie_layoutxlm config file.

---
 .../ser_vi_layoutxlm_xfund_zh.yml             | 151 ++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100644 models/cv/ocr/kie_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml

diff --git a/models/cv/ocr/kie_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml b/models/cv/ocr/kie_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml
new file mode 100644
index 00000000..5f2f6345
--- /dev/null
+++ b/models/cv/ocr/kie_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml
@@ -0,0 +1,151 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+Global:
+  use_gpu: True
+  epoch_num: &epoch_num 200
+  log_smooth_window: 10
+  print_batch_step: 10
+  save_model_dir: ./output/ser_vi_layoutxlm_xfund_zh
+  save_epoch_step: 2000
+  # evaluation is run every 10 iterations after the 0th iteration
+  eval_batch_step: [ 0, 19 ]
+  cal_metric_during_train: False
+  save_inference_dir:
+  use_visualdl: False
+  seed: 2022
+  infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
+  # if you want to predict using the groundtruth ocr info,
+  # you can use the following config
+  # infer_img: train_data/XFUND/zh_val/val.json
+  # infer_mode: False
+
+  save_res_path: ./output/ser/xfund_zh/res
+  kie_rec_model_dir: 
+  kie_det_model_dir:
+
+Architecture:
+  model_type: kie
+  algorithm: &algorithm "LayoutXLM"
+  Transform:
+  Backbone:
+    name: LayoutXLMForSer
+    pretrained: True
+    checkpoints:
+    # one of base or vi
+    mode: vi
+    num_classes: &num_classes 7
+
+Loss:
+  name: VQASerTokenLayoutLMLoss
+  num_classes: *num_classes
+  key: "backbone_out"
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Linear
+    learning_rate: 0.00005
+    epochs: *epoch_num
+    warmup_epoch: 2
+  regularizer:
+    name: L2
+    factor: 0.00000
+    
+PostProcess:
+  name: VQASerTokenLayoutLMPostProcess
+  class_path: &class_path train_data/XFUND/class_list_xfun.txt
+
+Metric:
+  name: VQASerTokenMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: train_data/XFUND/zh_train/image
+    label_file_list: 
+      - train_data/XFUND/zh_train/train.json
+    ratio_list: [ 1.0 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: RGB
+          channel_first: False
+      - VQATokenLabelEncode: # Class handling label
+          contains_re: False
+          algorithm: *algorithm
+          class_path: *class_path
+          use_textline_bbox_info: &use_textline_bbox_info True
+          # one of [None, "tb-yx"]
+          order_method: &order_method "tb-yx"
+      - VQATokenPad:
+          max_seq_len: &max_seq_len 512
+          return_attention_mask: True
+      - VQASerTokenChunk:
+          max_seq_len: *max_seq_len
+      - Resize:
+          size: [224,224]
+      - NormalizeImage:
+          scale: 1
+          mean: [ 123.675, 116.28, 103.53 ]
+          std: [ 58.395, 57.12, 57.375 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 8
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: train_data/XFUND/zh_val/image
+    label_file_list:
+      - train_data/XFUND/zh_val/val.json
+    transforms:
+      - DecodeImage: # load image
+          img_mode: RGB
+          channel_first: False
+      - VQATokenLabelEncode: # Class handling label
+          contains_re: False
+          algorithm: *algorithm
+          class_path: *class_path
+          use_textline_bbox_info: *use_textline_bbox_info
+          order_method: *order_method
+      - VQATokenPad:
+          max_seq_len: *max_seq_len
+          return_attention_mask: True
+      - VQASerTokenChunk:
+          max_seq_len: *max_seq_len
+      - Resize:
+          size: [224,224]
+      - NormalizeImage:
+          scale: 1
+          mean: [ 123.675, 116.28, 103.53 ]
+          std: [ 58.395, 57.12, 57.375 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 8
+    num_workers: 4
-- 
Gitee