From db92284367eb223aa639e4d7e98cf9450ee92748 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 7 Jan 2025 16:17:42 +0800
Subject: [PATCH 01/35] init ixrt ci

---
 .../classification/alexnet/ixrt/ci/prepare.sh |  29 +-
 .../convnext_small/ixrt/README.md             |   8 +-
 .../convnext_small/ixrt/ci/prepare.sh         |  29 +
 .../convnext_small/ixrt/inference.py          |   4 +
 .../convnext_small/ixrt/requirements.txt      |   7 +
 .../cspdarknet53/ixrt/README.md               |  13 +-
 .../cspdarknet53/ixrt/ci/prepare.sh           |  34 ++
 .../cspdarknet53/ixrt/inference.py            |   4 +
 .../cspdarknet53/ixrt/requirements.txt        |   6 +
 .../classification/cspresnet50/ixrt/README.md |  12 +-
 .../cspresnet50/ixrt/ci/prepare.sh            |  34 ++
 .../cspresnet50/ixrt/inference.py             |   5 +-
 .../cspresnet50/ixrt/requirements.txt         |   7 +
 .../densenet121/ixrt/ci/prepare.sh            |  27 +
 .../classification/densenet161/ixrt/README.md |   6 +-
 .../densenet161/ixrt/ci/prepare.sh            |  30 +
 .../densenet161/ixrt/inference.py             |   4 +
 .../densenet161/ixrt/requirements.txt         |   5 +
 .../classification/densenet169/ixrt/README.md |   6 +-
 .../densenet169/ixrt/ci/prepare.sh            |  30 +
 .../densenet169/ixrt/inference.py             |   4 +
 .../densenet169/ixrt/requirements.txt         |   5 +
 .../efficientnet_b0/ixrt/ci/prepare.sh        |  29 +-
 .../efficientnet_b1/ixrt/README.md            |   5 +-
 .../efficientnet_b1/ixrt/ci/prepare.sh        |  33 ++
 .../efficientnet_b1/ixrt/inference.py         |   5 +-
 .../efficientnet_b1/ixrt/requirements.txt     |   4 +
 .../efficientnet_b2/ixrt/README.md            |   9 +-
 .../efficientnet_b2/ixrt/ci/prepare.sh        |  30 +
 .../efficientnet_b2/ixrt/inference.py         |   4 +
 .../efficientnet_b2/ixrt/requirements.txt     |   4 +
 ...=> infer_efficientnet_b2_fp16_accuracy.sh} |   0
 ...infer_efficientnet_b2_fp16_performance.sh} |   0
 .../efficientnet_v2/ixrt/ci/prepare.sh        |  27 +
 .../googlenet/ixrt/ci/prepare.sh              |  27 +
 .../classification/hrnet_w18/ixrt/README.md   |   8 +-
 .../hrnet_w18/ixrt/ci/prepare.sh              |  32 ++
 .../hrnet_w18/ixrt/inference.py               |   5 +-
 .../hrnet_w18/ixrt/requirements.txt           |   7 +
 .../inception_v3/ixrt/ci/prepare.sh           |  27 +
 .../inceptionresnetv2/ixrt/README.md          |  12 +-
 .../inceptionresnetv2/ixrt/ci/prepare.sh      |  32 ++
 .../inceptionresnetv2/ixrt/requirements.txt   |  11 +
 .../mobilenet_v2/ixrt/ci/prepare.sh           |  27 +
 .../mobilenet_v3/ixrt/ci/prepare.sh           |  27 +
 .../cv/classification/repvgg/ixrt/README.md   |  12 +-
 .../classification/repvgg/ixrt/ci/prepare.sh  |  34 ++
 .../classification/repvgg/ixrt/inference.py   |   5 +-
 .../repvgg/ixrt/requirements.txt              |   7 +
 .../res2net50/ixrt/ci/prepare.sh              |  27 +
 .../classification/resnet101/ixrt/README.md   |   5 +-
 .../resnet101/ixrt/ci/prepare.sh              |  32 ++
 .../resnet101/ixrt/inference.py               |   5 +-
 .../resnet101/ixrt/requirements.txt           |   4 +
 .../resnet18/ixrt/ci/prepare.sh               |  27 +
 .../cv/classification/resnet34/ixrt/README.md |   5 +-
 .../resnet34/ixrt/ci/prepare.sh               |  32 ++
 .../classification/resnet34/ixrt/inference.py |   4 +
 .../resnet34/ixrt/requirements.txt            |   4 +
 .../resnet50/ixrt/ci/prepare.sh               |  27 +
 .../classification/resnetv1d50/ixrt/README.md |  10 +-
 .../resnetv1d50/ixrt/ci/prepare.sh            |  32 ++
 ...ESNET_V1_D50_CONFIG => RESNETV1D50_CONFIG} |   0
 .../resnetv1d50/ixrt/inference.py             |   5 +-
 .../resnetv1d50/ixrt/requirements.txt         |   7 +
 .../resnext50_32x4d/ixrt/README.md            |   9 +-
 .../resnext50_32x4d/ixrt/ci/prepare.sh        |  29 +
 .../resnext50_32x4d/ixrt/inference.py         |   4 +
 .../resnext50_32x4d/ixrt/requirements.txt     |   8 +
 .../shufflenet_v1/ixrt/README.md              |   4 +-
 .../shufflenet_v1/ixrt/ci/prepare.sh          |  15 +-
 .../ixrt/README.md                            |  14 +-
 .../ixrt/build_engine.py                      |   0
 .../ixrt/calibration_dataset.py               |   0
 .../squeezenet_v1_0/ixrt/ci/prepare.sh        |  30 +
 .../ixrt/common.py                            |   0
 .../ixrt/config/SQUEEZENET_V1_0_CONFIG}       |   0
 .../ixrt/export_onnx.py                       |   0
 .../ixrt/inference.py                         |   5 +-
 .../ixrt/modify_batchsize.py                  |   0
 .../ixrt/quant.py                             |   0
 .../ixrt/refine_model.py                      |   0
 .../squeezenet_v1_0/ixrt/requirements.txt     |   3 +
 .../infer_squeezenet_v1_0_fp16_accuracy.sh}   |   0
 ...infer_squeezenet_v1_0_fp16_performance.sh} |   0
 .../infer_squeezenet_v1_0_int8_accuracy.sh}   |   0
 ...infer_squeezenet_v1_0_int8_performance.sh} |   0
 .../ixrt/simplify_model.py                    |   0
 .../squeezenet_v1_1/ixrt/ci/prepare.sh        |  27 +
 .../swin_transformer_large/ixrt/README.md     |  11 +-
 .../swin_transformer_large/ixrt/ci/prepare.sh |  30 +
 .../ixrt/requirements.txt                     |  10 +
 .../classification/vgg16/ixrt/ci/prepare.sh   |  27 +
 .../wide_resnet50/ixrt/README.md              |   3 +-
 .../wide_resnet50/ixrt/ci/prepare.sh          |  30 +
 .../wide_resnet50/ixrt/inference.py           |   4 +
 .../wide_resnet50/ixrt/requirements.txt       |   2 +
 .../ixrt/datasets/transformations.py          |   0
 tests/models_ixrt.yaml                        | 507 ++++++++++++++++
 tests/run_ixrt.py                             | 541 ++++++++++++++++++
 100 files changed, 2139 insertions(+), 132 deletions(-)
 create mode 100644 models/cv/classification/convnext_small/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/convnext_small/ixrt/requirements.txt
 create mode 100644 models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/cspdarknet53/ixrt/requirements.txt
 create mode 100644 models/cv/classification/cspresnet50/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/cspresnet50/ixrt/requirements.txt
 create mode 100644 models/cv/classification/densenet161/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/densenet161/ixrt/requirements.txt
 create mode 100644 models/cv/classification/densenet169/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/densenet169/ixrt/requirements.txt
 create mode 100644 models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/efficientnet_b1/ixrt/requirements.txt
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/efficientnet_b2/ixrt/requirements.txt
 rename models/cv/classification/efficientnet_b2/ixrt/scripts/{infer_efficientnet_b1_fp16_accuracy.sh => infer_efficientnet_b2_fp16_accuracy.sh} (100%)
 rename models/cv/classification/efficientnet_b2/ixrt/scripts/{infer_efficientnet_b1_fp16_performance.sh => infer_efficientnet_b2_fp16_performance.sh} (100%)
 create mode 100644 models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/hrnet_w18/ixrt/requirements.txt
 create mode 100644 models/cv/classification/inceptionresnetv2/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/inceptionresnetv2/ixrt/requirements.txt
 create mode 100644 models/cv/classification/repvgg/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/repvgg/ixrt/requirements.txt
 create mode 100644 models/cv/classification/resnet101/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/resnet101/ixrt/requirements.txt
 create mode 100644 models/cv/classification/resnet34/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/resnet34/ixrt/requirements.txt
 create mode 100644 models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
 rename models/cv/classification/resnetv1d50/ixrt/config/{RESNET_V1_D50_CONFIG => RESNETV1D50_CONFIG} (100%)
 create mode 100644 models/cv/classification/resnetv1d50/ixrt/requirements.txt
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/resnext50_32x4d/ixrt/requirements.txt
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/README.md (79%)
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/build_engine.py (100%)
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/calibration_dataset.py (100%)
 create mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/common.py (100%)
 rename models/cv/classification/{squeezenet_1.0/ixrt/config/SQUEEZENET_V10_CONFIG => squeezenet_v1_0/ixrt/config/SQUEEZENET_V1_0_CONFIG} (100%)
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/export_onnx.py (100%)
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/inference.py (97%)
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/modify_batchsize.py (100%)
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/quant.py (100%)
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/refine_model.py (100%)
 create mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt
 rename models/cv/classification/{squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_accuracy.sh => squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_accuracy.sh} (100%)
 rename models/cv/classification/{squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_performance.sh => squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_performance.sh} (100%)
 rename models/cv/classification/{squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_accuracy.sh => squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_accuracy.sh} (100%)
 rename models/cv/classification/{squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_performance.sh => squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_performance.sh} (100%)
 rename models/cv/classification/{squeezenet_1.0 => squeezenet_v1_0}/ixrt/simplify_model.py (100%)
 create mode 100644 models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/swin_transformer_large/ixrt/requirements.txt
 create mode 100644 models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh
 create mode 100644 models/cv/classification/wide_resnet50/ixrt/requirements.txt
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/datasets/transformations.py (100%)
 create mode 100644 tests/models_ixrt.yaml
 create mode 100644 tests/run_ixrt.py

diff --git a/models/cv/classification/alexnet/ixrt/ci/prepare.sh b/models/cv/classification/alexnet/ixrt/ci/prepare.sh
index 065a9d03..7795e04d 100644
--- a/models/cv/classification/alexnet/ixrt/ci/prepare.sh
+++ b/models/cv/classification/alexnet/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
-python3 export_onnx.py --origin_model /root/data/checkpoints/alexnet.pth --output_model checkpoints/alexnet.onnx
\ No newline at end of file
+python3 export_onnx.py --origin_model /root/data/checkpoints/alexnet-owt-7be5be79.pth --output_model checkpoints/alexnet.onnx
\ No newline at end of file
diff --git a/models/cv/classification/convnext_small/ixrt/README.md b/models/cv/classification/convnext_small/ixrt/README.md
index c6550180..2e9f8521 100644
--- a/models/cv/classification/convnext_small/ixrt/README.md
+++ b/models/cv/classification/convnext_small/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install ppq
-pip3 install tqdm
-pip3 install cuda-python
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/convnext_small/ixrt/ci/prepare.sh b/models/cv/classification/convnext_small/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..a6336683
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/ci/prepare.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+python3 export.py --weight /root/data/checkpoints/convnext_small-0c510722.pth --output convnext_small.onnx
\ No newline at end of file
diff --git a/models/cv/classification/convnext_small/ixrt/inference.py b/models/cv/classification/convnext_small/ixrt/inference.py
index 22f1644c..3d3cf572 100644
--- a/models/cv/classification/convnext_small/ixrt/inference.py
+++ b/models/cv/classification/convnext_small/ixrt/inference.py
@@ -82,6 +82,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -116,6 +117,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        end2end_time = end_time - start_time
+        print(F"E2E time : {end2end_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/convnext_small/ixrt/requirements.txt b/models/cv/classification/convnext_small/ixrt/requirements.txt
new file mode 100644
index 00000000..520130b7
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+tabulate
+ppq
+tqdm
+cuda-python
\ No newline at end of file
diff --git a/models/cv/classification/cspdarknet53/ixrt/README.md b/models/cv/classification/cspdarknet53/ixrt/README.md
index 4b191542..b8fc83ae 100644
--- a/models/cv/classification/cspdarknet53/ixrt/README.md
+++ b/models/cv/classification/cspdarknet53/ixrt/README.md
@@ -15,12 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install onnx
-pip3 install tqdm
-pip3 install onnxsim
-pip3 install ppq
-pip3 install mmcv==1.5.3
-pip3 install mmcls
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -42,8 +37,8 @@ wget -O cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth https://download.
 python3 export.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx
 
 # Use onnxsim optimize onnx model
-mkdir -p data/checkpoints/cspdarknet53_ckpt
-onnxsim cspdarknet5.onnx data/checkpoints/cspdarknet53_ckpt/cspdarknet53_sim.onnx
+mkdir -p checkpoints
+onnxsim cspdarknet5.onnx checkpoints/cspdarknet53_sim.onnx
 
 ```
 
@@ -51,7 +46,7 @@ onnxsim cspdarknet5.onnx data/checkpoints/cspdarknet53_ckpt/cspdarknet53_sim.onn
 
 ```bash
 export DATASETS_DIR=/Path/to/imagenet_val/
-export CHECKPOINTS_DIR=/Path/to/data/checkpoints/cspdarknet53_ckpt
+export CHECKPOINTS_DIR=/Path/to/checkpoints/
 export CONFIG_DIR=./config/CSPDARKNET53_CONFIG
 ```
 
diff --git a/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..891f9f55
--- /dev/null
+++ b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
+
+python3 export.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight /root/data/checkpoints/cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx
+
+mkdir -p checkpoints
+onnxsim cspdarknet5.onnx checkpoints/cspdarknet53_sim.onnx
\ No newline at end of file
diff --git a/models/cv/classification/cspdarknet53/ixrt/inference.py b/models/cv/classification/cspdarknet53/ixrt/inference.py
index 56b7f51c..360b0cf0 100755
--- a/models/cv/classification/cspdarknet53/ixrt/inference.py
+++ b/models/cv/classification/cspdarknet53/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -107,6 +108,9 @@ def main(config):
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
 
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/cspdarknet53/ixrt/requirements.txt b/models/cv/classification/cspdarknet53/ixrt/requirements.txt
new file mode 100644
index 00000000..40d37d5a
--- /dev/null
+++ b/models/cv/classification/cspdarknet53/ixrt/requirements.txt
@@ -0,0 +1,6 @@
+onnx
+tqdm
+onnxsim
+ppq
+mmcv==1.5.3
+mmcls
\ No newline at end of file
diff --git a/models/cv/classification/cspresnet50/ixrt/README.md b/models/cv/classification/cspresnet50/ixrt/README.md
index 9a5d01a8..a95c9a4c 100644
--- a/models/cv/classification/cspresnet50/ixrt/README.md
+++ b/models/cv/classification/cspresnet50/ixrt/README.md
@@ -16,13 +16,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install tabulate
-pip3 install onnx
-pip3 install onnxsim
-pip3 install opencv-python==4.6.0.66
-pip3 install mmcls==0.24.0
-pip3 install mmcv==1.5.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -33,12 +27,10 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 
 ```bash
 mkdir checkpoints 
-cd checkpoints
 git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
-cd ..
 
 python3 export_onnx.py   \
-    --config_file ./checkpoints/mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py  \
+    --config_file ./mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py  \
     --checkpoint_file  https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth \
     --output_model ./checkpoints/cspresnet50.onnx
 ```
diff --git a/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh b/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..d3e995c4
--- /dev/null
+++ b/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
+mkdir -p checkpoints
+python3 export_onnx.py   \
+    --config_file ./mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py  \
+    --checkpoint_file  /root/data/checkpoints/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth \
+    --output_model ./checkpoints/cspresnet50.onnx
diff --git a/models/cv/classification/cspresnet50/ixrt/inference.py b/models/cv/classification/cspresnet50/ixrt/inference.py
index 1ec56b4a..11a90c79 100644
--- a/models/cv/classification/cspresnet50/ixrt/inference.py
+++ b/models/cv/classification/cspresnet50/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/cspresnet50/ixrt/requirements.txt b/models/cv/classification/cspresnet50/ixrt/requirements.txt
new file mode 100644
index 00000000..fdd84a5d
--- /dev/null
+++ b/models/cv/classification/cspresnet50/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+tabulate
+onnx
+onnxsim
+opencv-python==4.6.0.66
+mmcls==0.24.0
+mmcv==1.5.3
\ No newline at end of file
diff --git a/models/cv/classification/densenet121/ixrt/ci/prepare.sh b/models/cv/classification/densenet121/ixrt/ci/prepare.sh
index 4892448b..c3103b01 100644
--- a/models/cv/classification/densenet121/ixrt/ci/prepare.sh
+++ b/models/cv/classification/densenet121/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 mkdir -p /root/.cache/torch/hub/checkpoints/
diff --git a/models/cv/classification/densenet161/ixrt/README.md b/models/cv/classification/densenet161/ixrt/README.md
index cb65f64a..c1d5a157 100644
--- a/models/cv/classification/densenet161/ixrt/README.md
+++ b/models/cv/classification/densenet161/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install cuda-python
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/densenet161/ixrt/ci/prepare.sh b/models/cv/classification/densenet161/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..79d6d753
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+python3 export.py --weight /root/data/checkpoints/densenet161-8d451a50.pth --output densenet161.onnx
\ No newline at end of file
diff --git a/models/cv/classification/densenet161/ixrt/inference.py b/models/cv/classification/densenet161/ixrt/inference.py
index 22f1644c..e7102e50 100644
--- a/models/cv/classification/densenet161/ixrt/inference.py
+++ b/models/cv/classification/densenet161/ixrt/inference.py
@@ -82,6 +82,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -116,6 +117,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/densenet161/ixrt/requirements.txt b/models/cv/classification/densenet161/ixrt/requirements.txt
new file mode 100644
index 00000000..4805e251
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+tabulate
+cuda-python
\ No newline at end of file
diff --git a/models/cv/classification/densenet169/ixrt/README.md b/models/cv/classification/densenet169/ixrt/README.md
index 6abd12c0..79c0b0a4 100644
--- a/models/cv/classification/densenet169/ixrt/README.md
+++ b/models/cv/classification/densenet169/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install cuda-python
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/densenet169/ixrt/ci/prepare.sh b/models/cv/classification/densenet169/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..21091147
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+python3 export.py --weight /root/data/checkpoints/densenet169-b2777c0a.pth --output densenet161.onnx
\ No newline at end of file
diff --git a/models/cv/classification/densenet169/ixrt/inference.py b/models/cv/classification/densenet169/ixrt/inference.py
index 22f1644c..e7102e50 100644
--- a/models/cv/classification/densenet169/ixrt/inference.py
+++ b/models/cv/classification/densenet169/ixrt/inference.py
@@ -82,6 +82,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -116,6 +117,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/densenet169/ixrt/requirements.txt b/models/cv/classification/densenet169/ixrt/requirements.txt
new file mode 100644
index 00000000..4805e251
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+tabulate
+cuda-python
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
index aba93afd..ca66169b 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
-python3 export_onnx.py --origin_model /root/data/checkpoints/efficientnet_b0.pth --output_model checkpoints/efficientnet_b0.onnx
\ No newline at end of file
+python3 export_onnx.py --origin_model /root/data/checpoints/efficientnet_b0_rwightman-3dd342df.pth --output_model checkpoints/efficientnet_b0.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b1/ixrt/README.md b/models/cv/classification/efficientnet_b1/ixrt/README.md
index 3e09969a..282b6440 100644
--- a/models/cv/classification/efficientnet_b1/ixrt/README.md
+++ b/models/cv/classification/efficientnet_b1/ixrt/README.md
@@ -15,10 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..7131b5f0
--- /dev/null
+++ b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/efficientnet_b1-c27df63c.pth /root/.cache/torch/hub/checkpoints/efficientnet_b1-c27df63c.pth
+python3 export_onnx.py --output_model checkpoints/efficientnet-b1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b1/ixrt/inference.py b/models/cv/classification/efficientnet_b1/ixrt/inference.py
index 1ec56b4a..11a90c79 100644
--- a/models/cv/classification/efficientnet_b1/ixrt/inference.py
+++ b/models/cv/classification/efficientnet_b1/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/efficientnet_b1/ixrt/requirements.txt b/models/cv/classification/efficientnet_b1/ixrt/requirements.txt
new file mode 100644
index 00000000..e1eda59c
--- /dev/null
+++ b/models/cv/classification/efficientnet_b1/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tqdm
+onnx
+onnxsim
+tabulate
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/ixrt/README.md b/models/cv/classification/efficientnet_b2/ixrt/README.md
index 1eed0f48..cc225491 100644
--- a/models/cv/classification/efficientnet_b2/ixrt/README.md
+++ b/models/cv/classification/efficientnet_b2/ixrt/README.md
@@ -15,10 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -43,9 +40,9 @@ export DATASETS_DIR=/Path/to/imagenet_val/
 
 ```bash
 # Accuracy
-bash scripts/infer_efficientnet_b1_fp16_accuracy.sh
+bash scripts/infer_efficientnet_b2_fp16_accuracy.sh
 # Performance
-bash scripts/infer_efficientnet_b1_fp16_performance.sh
+bash scripts/infer_efficientnet_b2_fp16_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..9ec3aff1
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+python3 export.py --weight /root/data/checkpoints/efficientnet_b2_rwightman-c35c1473.pth --output efficientnet_b2.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/ixrt/inference.py b/models/cv/classification/efficientnet_b2/ixrt/inference.py
index 4afba6bc..e5a03525 100644
--- a/models/cv/classification/efficientnet_b2/ixrt/inference.py
+++ b/models/cv/classification/efficientnet_b2/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -117,6 +118,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/efficientnet_b2/ixrt/requirements.txt b/models/cv/classification/efficientnet_b2/ixrt/requirements.txt
new file mode 100644
index 00000000..e1eda59c
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tqdm
+onnx
+onnxsim
+tabulate
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_accuracy.sh b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_accuracy.sh
rename to models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_accuracy.sh
diff --git a/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_performance.sh b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b1_fp16_performance.sh
rename to models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_performance.sh
diff --git a/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh
index 00824ca4..89b3d7e0 100644
--- a/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh
+++ b/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir -p checkpoints
 unzip /root/data/repos/pytorch-image-models-a852318b636a8.zip -d ./
diff --git a/models/cv/classification/googlenet/ixrt/ci/prepare.sh b/models/cv/classification/googlenet/ixrt/ci/prepare.sh
index d1753c6a..8a8c7769 100644
--- a/models/cv/classification/googlenet/ixrt/ci/prepare.sh
+++ b/models/cv/classification/googlenet/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/googlenet.pth --output_model checkpoints/googlenet.onnx
\ No newline at end of file
diff --git a/models/cv/classification/hrnet_w18/ixrt/README.md b/models/cv/classification/hrnet_w18/ixrt/README.md
index 278d5427..2d38c4d4 100644
--- a/models/cv/classification/hrnet_w18/ixrt/README.md
+++ b/models/cv/classification/hrnet_w18/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install ppq
-pip3 install mmpretrain
-pip3 install mmcv-lite
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh b/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..dbb45d9a
--- /dev/null
+++ b/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth /root/.cache/torch/hub/checkpoints/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth
+python3 export_onnx.py --output_model checkpoints/hrnet-w18.onnx
\ No newline at end of file
diff --git a/models/cv/classification/hrnet_w18/ixrt/inference.py b/models/cv/classification/hrnet_w18/ixrt/inference.py
index 86f0cdf2..47a3f640 100644
--- a/models/cv/classification/hrnet_w18/ixrt/inference.py
+++ b/models/cv/classification/hrnet_w18/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -104,7 +105,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/hrnet_w18/ixrt/requirements.txt b/models/cv/classification/hrnet_w18/ixrt/requirements.txt
new file mode 100644
index 00000000..84b43c5d
--- /dev/null
+++ b/models/cv/classification/hrnet_w18/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+tabulate
+ppq
+mmpretrain
+mmcv-lite
\ No newline at end of file
diff --git a/models/cv/classification/inception_v3/ixrt/ci/prepare.sh b/models/cv/classification/inception_v3/ixrt/ci/prepare.sh
index 82636b36..a9b110fd 100644
--- a/models/cv/classification/inception_v3/ixrt/ci/prepare.sh
+++ b/models/cv/classification/inception_v3/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/inception_v3.pth --output_model checkpoints/inception_v3.onnx
\ No newline at end of file
diff --git a/models/cv/classification/inceptionresnetv2/ixrt/README.md b/models/cv/classification/inceptionresnetv2/ixrt/README.md
index 64690193..e55a13a2 100755
--- a/models/cv/classification/inceptionresnetv2/ixrt/README.md
+++ b/models/cv/classification/inceptionresnetv2/ixrt/README.md
@@ -15,17 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install Pillow
-pip3 install tabulate
-pip3 install pycuda
-pip3 install opencv-python==4.6.0.66
-pip3 install ppq
-pip3 install protobuf==3.20.0
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/inceptionresnetv2/ixrt/ci/prepare.sh b/models/cv/classification/inceptionresnetv2/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..2221e9b3
--- /dev/null
+++ b/models/cv/classification/inceptionresnetv2/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/inceptionresnetv2-520b38e4.pth /root/.cache/torch/hub/checkpoints/inceptionresnetv2-520b38e4.pth
+python3 export_model.py --output_model ./checkpoints/inceptionresnetv2.onnx
\ No newline at end of file
diff --git a/models/cv/classification/inceptionresnetv2/ixrt/requirements.txt b/models/cv/classification/inceptionresnetv2/ixrt/requirements.txt
new file mode 100644
index 00000000..7d96aa09
--- /dev/null
+++ b/models/cv/classification/inceptionresnetv2/ixrt/requirements.txt
@@ -0,0 +1,11 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+Pillow
+tabulate
+pycuda
+opencv-python==4.6.0.66
+ppq
+protobuf==3.20.0
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh b/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh
index aa0cae6e..277cd41a 100644
--- a/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh
+++ b/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/mobilenet_v2.pth --output_model checkpoints/mobilenet_v2.onnx
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh b/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh
index 34932cee..31817b28 100644
--- a/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh
+++ b/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/mobilenet_v3.pth --output_model checkpoints/mobilenet_v3.onnx
\ No newline at end of file
diff --git a/models/cv/classification/repvgg/ixrt/README.md b/models/cv/classification/repvgg/ixrt/README.md
index 37bbbcd4..e93c626a 100644
--- a/models/cv/classification/repvgg/ixrt/README.md
+++ b/models/cv/classification/repvgg/ixrt/README.md
@@ -16,13 +16,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install tabulate
-pip3 install onnx
-pip3 install onnxsim
-pip3 install opencv-python==4.6.0.66
-pip3 install mmcls==0.24.0
-pip3 install mmcv==1.5.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -33,12 +27,10 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 
 ```bash
 mkdir checkpoints 
-cd checkpoints
 git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
-cd ..
 
 python3 export_onnx.py   \
-    --config_file ./checkpoints/mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \
+    --config_file ./mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \
     --checkpoint_file https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth \
     --output_model ./checkpoints/repvgg_A0.onnx
 ```
diff --git a/models/cv/classification/repvgg/ixrt/ci/prepare.sh b/models/cv/classification/repvgg/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..6c43cd6e
--- /dev/null
+++ b/models/cv/classification/repvgg/ixrt/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+unzip -q /mnt/deepspark/data/repos/mmpretrain-0.24.0.zip -d ./
+mkdir -p checkpoints 
+python3 export_onnx.py   \
+    --config_file ./mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \
+    --checkpoint_file /root/data/checkpoints/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth \
+    --output_model ./checkpoints/repvgg_A0.onnx
\ No newline at end of file
diff --git a/models/cv/classification/repvgg/ixrt/inference.py b/models/cv/classification/repvgg/ixrt/inference.py
index 1ec56b4a..11a90c79 100644
--- a/models/cv/classification/repvgg/ixrt/inference.py
+++ b/models/cv/classification/repvgg/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/repvgg/ixrt/requirements.txt b/models/cv/classification/repvgg/ixrt/requirements.txt
new file mode 100644
index 00000000..fdd84a5d
--- /dev/null
+++ b/models/cv/classification/repvgg/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+tabulate
+onnx
+onnxsim
+opencv-python==4.6.0.66
+mmcls==0.24.0
+mmcv==1.5.3
\ No newline at end of file
diff --git a/models/cv/classification/res2net50/ixrt/ci/prepare.sh b/models/cv/classification/res2net50/ixrt/ci/prepare.sh
index d9c00227..9725f366 100644
--- a/models/cv/classification/res2net50/ixrt/ci/prepare.sh
+++ b/models/cv/classification/res2net50/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/res2net50.pth --output_model checkpoints/res2net50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnet101/ixrt/README.md b/models/cv/classification/resnet101/ixrt/README.md
index 92d6603e..d85e3120 100644
--- a/models/cv/classification/resnet101/ixrt/README.md
+++ b/models/cv/classification/resnet101/ixrt/README.md
@@ -15,10 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
+pip3 install -r reuirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/resnet101/ixrt/ci/prepare.sh b/models/cv/classification/resnet101/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..701c19ee
--- /dev/null
+++ b/models/cv/classification/resnet101/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/resnet101-63fe2227.pth /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
+python3 export_onnx.py --output_model checkpoints/resnet101.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnet101/ixrt/inference.py b/models/cv/classification/resnet101/ixrt/inference.py
index 1ec56b4a..11a90c79 100644
--- a/models/cv/classification/resnet101/ixrt/inference.py
+++ b/models/cv/classification/resnet101/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/resnet101/ixrt/requirements.txt b/models/cv/classification/resnet101/ixrt/requirements.txt
new file mode 100644
index 00000000..e1eda59c
--- /dev/null
+++ b/models/cv/classification/resnet101/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tqdm
+onnx
+onnxsim
+tabulate
\ No newline at end of file
diff --git a/models/cv/classification/resnet18/ixrt/ci/prepare.sh b/models/cv/classification/resnet18/ixrt/ci/prepare.sh
index bfa46b49..eadb2c8f 100644
--- a/models/cv/classification/resnet18/ixrt/ci/prepare.sh
+++ b/models/cv/classification/resnet18/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/resnet18.pth --output_model checkpoints/resnet18.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnet34/ixrt/README.md b/models/cv/classification/resnet34/ixrt/README.md
index 243c536c..8855611e 100644
--- a/models/cv/classification/resnet34/ixrt/README.md
+++ b/models/cv/classification/resnet34/ixrt/README.md
@@ -15,10 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/resnet34/ixrt/ci/prepare.sh b/models/cv/classification/resnet34/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..7916795d
--- /dev/null
+++ b/models/cv/classification/resnet34/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/resnet34-b627a593.pth /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
+python3 export_onnx.py --output_model checkpoints/resnet34.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnet34/ixrt/inference.py b/models/cv/classification/resnet34/ixrt/inference.py
index 2c9dcb3f..77a1888d 100644
--- a/models/cv/classification/resnet34/ixrt/inference.py
+++ b/models/cv/classification/resnet34/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,6 +106,9 @@ def main(config):
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
 
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/resnet34/ixrt/requirements.txt b/models/cv/classification/resnet34/ixrt/requirements.txt
new file mode 100644
index 00000000..e1eda59c
--- /dev/null
+++ b/models/cv/classification/resnet34/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tqdm
+onnx
+onnxsim
+tabulate
\ No newline at end of file
diff --git a/models/cv/classification/resnet50/ixrt/ci/prepare.sh b/models/cv/classification/resnet50/ixrt/ci/prepare.sh
index 5dddbc64..7bdae79a 100644
--- a/models/cv/classification/resnet50/ixrt/ci/prepare.sh
+++ b/models/cv/classification/resnet50/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/resnet50.pth --output_model checkpoints/resnet50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/README.md b/models/cv/classification/resnetv1d50/ixrt/README.md
index 0214bbd6..d4485ebe 100644
--- a/models/cv/classification/resnetv1d50/ixrt/README.md
+++ b/models/cv/classification/resnetv1d50/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install ppq
-pip3 install mmpretrain
-pip3 install mmcv-lite
+pip3 install -r requirments.txt
 ```
 
 ### Download
@@ -41,7 +35,7 @@ python3 export_onnx.py --output_model checkpoints/resnet_v1_d50.onnx
 export DATASETS_DIR=/path/to/imagenet_val/
 export CHECKPOINTS_DIR=./checkpoints
 export RUN_DIR=./
-export CONFIG_DIR=config/RESNET_V1_D50_CONFIG
+export CONFIG_DIR=config/RESNETV1D50_CONFIG
 ```
 
 ### FP16
diff --git a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..e4f74d0f
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth /root/.cache/torch/hub/checkpoints/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth
+python3 export_onnx.py --output_model checkpoints/resnet_v1_d50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/config/RESNET_V1_D50_CONFIG b/models/cv/classification/resnetv1d50/ixrt/config/RESNETV1D50_CONFIG
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/config/RESNET_V1_D50_CONFIG
rename to models/cv/classification/resnetv1d50/ixrt/config/RESNETV1D50_CONFIG
diff --git a/models/cv/classification/resnetv1d50/ixrt/inference.py b/models/cv/classification/resnetv1d50/ixrt/inference.py
index 2c9dcb3f..6d34c88c 100644
--- a/models/cv/classification/resnetv1d50/ixrt/inference.py
+++ b/models/cv/classification/resnetv1d50/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -104,7 +105,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/resnetv1d50/ixrt/requirements.txt b/models/cv/classification/resnetv1d50/ixrt/requirements.txt
new file mode 100644
index 00000000..84b43c5d
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+tabulate
+ppq
+mmpretrain
+mmcv-lite
\ No newline at end of file
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/README.md b/models/cv/classification/resnext50_32x4d/ixrt/README.md
index 79d140cd..0c7ed2fe 100644
--- a/models/cv/classification/resnext50_32x4d/ixrt/README.md
+++ b/models/cv/classification/resnext50_32x4d/ixrt/README.md
@@ -9,14 +9,7 @@ The ResNeXt50_32x4d model is a convolutional neural network architecture designe
 ### Install
 
 ```bash
-pip3 install onnx
-pip3 install tqdm
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install ppq
-pip3 install cuda-python
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..f264fc49
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+python3 export.py --weight /root/data/checkpoints/resnext50_32x4d-7cdf4587.pth --output resnext50_32x4d.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/inference.py b/models/cv/classification/resnext50_32x4d/ixrt/inference.py
index 4afba6bc..e5a03525 100644
--- a/models/cv/classification/resnext50_32x4d/ixrt/inference.py
+++ b/models/cv/classification/resnext50_32x4d/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -117,6 +118,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/requirements.txt b/models/cv/classification/resnext50_32x4d/ixrt/requirements.txt
new file mode 100644
index 00000000..54599ec2
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/requirements.txt
@@ -0,0 +1,8 @@
+onnx
+tqdm
+tqdm
+onnx
+onnxsim
+tabulate
+ppq
+cuda-python
\ No newline at end of file
diff --git a/models/cv/classification/shufflenet_v1/ixrt/README.md b/models/cv/classification/shufflenet_v1/ixrt/README.md
index 4dee5cae..94c88432 100644
--- a/models/cv/classification/shufflenet_v1/ixrt/README.md
+++ b/models/cv/classification/shufflenet_v1/ixrt/README.md
@@ -29,12 +29,10 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 
 ```bash
 mkdir checkpoints 
-cd checkpoints
 git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
-cd ..
 
 python3 export_onnx.py   \
-    --config_file ./checkpoints/mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py  \
+    --config_file ./mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py  \
     --checkpoint_file  ./shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth \
     --output_model ./checkpoints/shufflenet_v1.onnx
 ```
diff --git a/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh b/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh
index a426ed33..bea7f22b 100644
--- a/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh
+++ b/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh
@@ -14,10 +14,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir -p checkpoints
-unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./checkpoints/
+unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
 python3 export_onnx.py   \
---config_file ./checkpoints/mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py  \
+--config_file ./mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py  \
 --checkpoint_file  /root/data/checkpoints/shufflenet_v1.pth \
 --output_model ./checkpoints/shufflenet_v1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/README.md b/models/cv/classification/squeezenet_v1_0/ixrt/README.md
similarity index 79%
rename from models/cv/classification/squeezenet_1.0/ixrt/README.md
rename to models/cv/classification/squeezenet_v1_0/ixrt/README.md
index b1d51b64..6af82041 100644
--- a/models/cv/classification/squeezenet_1.0/ixrt/README.md
+++ b/models/cv/classification/squeezenet_v1_0/ixrt/README.md
@@ -17,9 +17,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnxsim
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -42,25 +40,25 @@ export PROJ_DIR=./
 export DATASETS_DIR=/path/to/imagenet_val/
 export CHECKPOINTS_DIR=./checkpoints
 export RUN_DIR=./
-export CONFIG_DIR=config/SQUEEZENET_V10_CONFIG
+export CONFIG_DIR=config/SQUEEZENET_V1_0_CONFIG
 ```
 
 ### FP16
 
 ```bash
 # Accuracy
-bash scripts/infer_squeezenet_v10_fp16_accuracy.sh
+bash scripts/infer_squeezenet_v1_0_fp16_accuracy.sh
 # Performance
-bash scripts/infer_squeezenet_v10_fp16_performance.sh
+bash scripts/infer_squeezenet_v1_0_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_squeezenet_v10_int8_accuracy.sh
+bash scripts/infer_squeezenet_v1_0_int8_accuracy.sh
 # Performance
-bash scripts/infer_squeezenet_v10_int8_performance.sh
+bash scripts/infer_squeezenet_v1_0_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/build_engine.py b/models/cv/classification/squeezenet_v1_0/ixrt/build_engine.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/build_engine.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/build_engine.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/calibration_dataset.py b/models/cv/classification/squeezenet_v1_0/ixrt/calibration_dataset.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/calibration_dataset.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/calibration_dataset.py
diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh b/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..063ff0ec
--- /dev/null
+++ b/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints 
+python3 export_onnx.py --origin_model /root/data/checkpoints/squeezenet1_0-b66bff10.pth --output_model checkpoints/squeezenetv10.onnx
\ No newline at end of file
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/common.py b/models/cv/classification/squeezenet_v1_0/ixrt/common.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/common.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/common.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/config/SQUEEZENET_V10_CONFIG b/models/cv/classification/squeezenet_v1_0/ixrt/config/SQUEEZENET_V1_0_CONFIG
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/config/SQUEEZENET_V10_CONFIG
rename to models/cv/classification/squeezenet_v1_0/ixrt/config/SQUEEZENET_V1_0_CONFIG
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/export_onnx.py b/models/cv/classification/squeezenet_v1_0/ixrt/export_onnx.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/export_onnx.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/export_onnx.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/inference.py b/models/cv/classification/squeezenet_v1_0/ixrt/inference.py
similarity index 97%
rename from models/cv/classification/squeezenet_1.0/ixrt/inference.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/inference.py
index 1ec56b4a..11a90c79 100644
--- a/models/cv/classification/squeezenet_1.0/ixrt/inference.py
+++ b/models/cv/classification/squeezenet_v1_0/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/modify_batchsize.py b/models/cv/classification/squeezenet_v1_0/ixrt/modify_batchsize.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/modify_batchsize.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/modify_batchsize.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/quant.py b/models/cv/classification/squeezenet_v1_0/ixrt/quant.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/quant.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/quant.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/refine_model.py b/models/cv/classification/squeezenet_v1_0/ixrt/refine_model.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/refine_model.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/refine_model.py
diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt b/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt
new file mode 100644
index 00000000..24dc14b7
--- /dev/null
+++ b/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt
@@ -0,0 +1,3 @@
+tqdm
+onnxsim
+opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_accuracy.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_accuracy.sh
rename to models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_accuracy.sh
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_performance.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_performance.sh
rename to models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_performance.sh
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_accuracy.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_accuracy.sh
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_accuracy.sh
rename to models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_accuracy.sh
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_performance.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_performance.sh
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_performance.sh
rename to models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_performance.sh
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/simplify_model.py b/models/cv/classification/squeezenet_v1_0/ixrt/simplify_model.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/simplify_model.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/simplify_model.py
diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh b/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh
index b80c01e5..b88bcb1f 100644
--- a/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh
+++ b/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/squeezenet_v1_1.pth --output_model checkpoints/squeezenet_v1_1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/swin_transformer_large/ixrt/README.md b/models/cv/classification/swin_transformer_large/ixrt/README.md
index 7f2282f9..1010b800 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/README.md
+++ b/models/cv/classification/swin_transformer_large/ixrt/README.md
@@ -15,16 +15,7 @@ cd ${MODEL_PATH}
 
 apt install -y libnuma-dev libgl1-mesa-glx
 
-pip3 install onnxsim
-pip3 install onnx_graphsurgeon
-pip3 install scikit-learn
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..b88bcb1f
--- /dev/null
+++ b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+python3 export_onnx.py --origin_model /root/data/checkpoints/squeezenet_v1_1.pth --output_model checkpoints/squeezenet_v1_1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/swin_transformer_large/ixrt/requirements.txt b/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
new file mode 100644
index 00000000..512930b8
--- /dev/null
+++ b/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
@@ -0,0 +1,10 @@
+onnxsim
+onnx_graphsurgeon
+scikit-learn
+tqdm
+pycuda
+onnx
+tabulate
+cv2
+pycocotools
+opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/classification/vgg16/ixrt/ci/prepare.sh b/models/cv/classification/vgg16/ixrt/ci/prepare.sh
index 7d8e52b0..7492df8c 100644
--- a/models/cv/classification/vgg16/ixrt/ci/prepare.sh
+++ b/models/cv/classification/vgg16/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/vgg16.pth --output_model checkpoints/vgg16.onnx
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/README.md b/models/cv/classification/wide_resnet50/ixrt/README.md
index 72dd1308..72fd5b49 100644
--- a/models/cv/classification/wide_resnet50/ixrt/README.md
+++ b/models/cv/classification/wide_resnet50/ixrt/README.md
@@ -9,8 +9,7 @@ The distinguishing feature of Wide ResNet50 lies in its widened architecture com
 ### Install
 
 ```bash
-pip3 install onnx
-pip3 install tqdm
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh b/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..8f0dd69b
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir -p checkpoints/
+python3 export.py --weight /root/data/checkpoints/wide_resnet50_2-95faca4d.pth --output checkpoints/wide_resnet50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/inference.py b/models/cv/classification/wide_resnet50/ixrt/inference.py
index 2c9dcb3f..77a1888d 100644
--- a/models/cv/classification/wide_resnet50/ixrt/inference.py
+++ b/models/cv/classification/wide_resnet50/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,6 +106,9 @@ def main(config):
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
 
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/wide_resnet50/ixrt/requirements.txt b/models/cv/classification/wide_resnet50/ixrt/requirements.txt
new file mode 100644
index 00000000..ecd31631
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/requirements.txt
@@ -0,0 +1,2 @@
+onnx
+tqdm
\ No newline at end of file
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/transformations.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/transformations.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/transformations.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/transformations.py
diff --git a/tests/models_ixrt.yaml b/tests/models_ixrt.yaml
new file mode 100644
index 00000000..d7bc0f2b
--- /dev/null
+++ b/tests/models_ixrt.yaml
@@ -0,0 +1,507 @@
+---
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/alexnet-owt-7be5be79.pth
+  name: alexnet
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/alexnet/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/convnext_small-0c510722.pth
+  name: convnext_small
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/convnext_small/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://local/cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth
+  name: cspdarknet53
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/cspdarknet53/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth
+  name: cspresnet50
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/cspresnet50/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://local/densenet121.pth
+  name: densenet121
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/densenet121/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/densenet161-8d451a50.pth
+  name: densenet161
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/densenet161/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/densenet169-b2777c0a.pth
+  name: densenet169
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/densenet169/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth
+  name: efficientnet_b0
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/efficientnet_b0/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://local/efficientnet_b1.pth
+  name: efficientnet_b1
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/efficientnet_b1/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/efficientnet_b2_rwightman-c35c1473.pth
+  name: efficientnet_b2
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/efficientnet_b2/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://local/efficientnet_v2.pth
+  name: efficientnet_v2
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/efficientnet_v2/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/googlenet-1378be20.pth
+  name: googlenet
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/googlenet/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth
+  name: hrnet_w18
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/hrnet_w18/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth
+  name: inception_v3
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/inception_v3/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth
+  name: inceptionresnetv2
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/inceptionresnetv2/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
+  name: mobilenet_v2
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/mobilenet_v2/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth
+  name: mobilenet_v3
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/mobilenet_v3/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth
+  name: repvgg
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/repvgg/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url:https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net50_14w_8s-6527dddc.pth
+  name: res2net50
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/res2net50/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnet101.pth
+  name: resnet101
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnet101/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnet18-f37072fd.pth
+  name: resnet18
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnet18/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnet34-b627a593.pth
+  name: resnet34
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnet34/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnet50-0676ba61.pth
+  name: resnet50
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnet50/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth
+  name: resnetv1d50
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnetv1d50/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
+  name: resnext50_32x4d
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/resnext50_32x4d/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1.pth
+  name: shufflenet_v1
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/shufflenet_v1/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth
+  name: squeezenet_v1_0
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/squeezenet_v1_0/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth
+  name: squeezenet_v1_1
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/squeezenet_v1_1/ixrt
+  task_type: cv/classification
+- datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_imagenet.tar
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open-swin-large.tar
+  name: swin_transformer_large
+  need_comfirm: true
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/swin_transformer_large/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/vgg16-397923af.pth
+  name: vgg16
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/vgg16/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth
+  name: wide_resnet50
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/wide_resnet50/ixrt
+  task_type: cv/classification
+- datasets: local/coco
+  download_url: https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth
+  name: centernet
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/detection/centernet/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: hhttps://download.openmmlab.com/mmdetection/v3.0/detr/detr_r50_8xb2-150e_coco/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth
+  name: detr
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/detection/detr/i8xrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth
+  name: fcos
+  need_comfirm: true
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/cv/detection/fcos/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://pjreddie.com/media/files/yolov3.weights
+  name: yolov3
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov3/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights
+  name: yolov4
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov4/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5m.pt
+  name: yolov5
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov5/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt
+  name: yolov5s
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov5s/ixrt
+  task_type: cv/detection
+- 3rd_party_repo: YOLOv6
+  datasets: local/coco
+  download_url: https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s.pt
+  name: yolov6
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov6/ixrt
+  task_type: cv/detection
+- 3rd_party_repo: yolov7
+  datasets: local/coco
+  download_url: https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
+  name: yolov7
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov7/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt
+  name: yolov8
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov8/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth
+  name: yolox
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolox/ixrt
+  task_type: cv/detection
+- datasets: https://vis-www.cs.umass.edu/lfw/lfw.tgz
+  download_url: https://drive.google.com/open?id=1R77HmFADxe87GmoLwzfgMu_HY0IhcyBz
+  name: facenet
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/face/facenet/ixrt
+  task_type: cv/face
+- datasets: local/coco
+  download_url: https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth
+  name: lightweight_openpose
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/pose_estimation/lightweight_openpose/ixrt
+  task_type: cv/pose_estimation
+- datasets: local/coco
+  download_url: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth
+  name: rtmpose
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/pose_estimation/rtmpose/ixrt
+  task_type: cv/pose_estimation
+- datasets: local/coco
+  download_url: https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/model_final_9243eb.pkl
+  name: mask_rcnn
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/segmentation/mask_rcnn/ixrt
+  task_type: cv/segmentation
+- datasets: local/coco
+  download_url: https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth
+  name: solov1
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/segmentation/solov1/ixrt
+  task_type: cv/segmentation
+- datasets: local/coco
+  need_comfirm: true
+  download_url: clip-vit-base-patch32.zip
+  name: clip
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/multimodal/text_and_image/clip/ixformer
+  task_type: cv/segmentation
+- datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_albert.tar
+  name: albert
+  need_third_part: false
+  precisions:
+    - int8
+  relative_path: models/nlp/language_model/albert/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://huggingface.co/csarron/bert-base-uncased-squad-v1
+  name: bert_base_squad
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/nlp/language_model/bert_base_squad/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://huggingface.co/neuralmagic/bert-large-uncased-finetuned-squadv1
+  name: bert_large_squad
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/nlp/language_model/bert_large_squad/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_deberta.tar
+  name: deberta
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/nlp/language_model/deberta/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta.tar
+  name: roberta
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/nlp/language_model/roberta/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roformer.tar
+  name: roformer
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/nlp/language_model/roformer/ixrt
+  task_type: nlp/language_model
+- datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/cifar-100-python.tar
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_videobert.tar
+  name: videobert
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/nlp/language_model/videobert/ixrt
+  task_type: nlp/language_model
+- datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv
+  need_comfirm: true
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_wide_deep_saved_model.tar
+  name: widedeep
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/recommendation/widedeep/ixrt
+  task_type: recommendation/widedeep
+- datasets: https://www.openslr.org/33/aishell.tar.gz
+  download_url: http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20211025_conformer_exp.tar.gz
+  name: conformer
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/speech/speech_recognition/conformer/ixrt
+  task_type: speech/speech_recognition
+- datasets: https://www.openslr.org/33/aishell.tar.gz
+  download_url: https://drive.google.com/drive/folders/1_2zN6lbu4zUc0-iq8XbABEm6fl9mohkv
+  name: transformer_asr
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/speech/speech_recognition/transformer_asr/ixrt
+  task_type: speech/speech_recognition
\ No newline at end of file
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
new file mode 100644
index 00000000..04e077f6
--- /dev/null
+++ b/tests/run_ixrt.py
@@ -0,0 +1,541 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import yaml
+import subprocess
+import json
+import re
+import time
+import logging
+import os
+import sys
+import argparse
+
+import utils
+
+# 配置日志
+debug_level = logging.DEBUG if utils.is_debug() else logging.INFO
+logging.basicConfig(
+    handlers=[logging.FileHandler("output.log"), logging.StreamHandler()],
+    level=debug_level,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+METRIC_PATTERN = r"{'metricResult':.*}"
+
+def main():
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("--model", type=str, help="model name, e.g: alexnet")
+    args = parser.parse_args()
+
+    if args.model:
+        test_model = args.model
+    else:
+        test_model = os.environ.get("TEST_CASE")
+    logging.info(f"Test case to run: {test_model}")
+    if not test_model:
+        logging.error("test model case is empty")
+        sys.exit(-1)
+    
+    model = get_model_config(test_model)
+    if not model:
+        logging.error("mode config is empty")
+        sys.exit(-1)
+
+    result = {}
+    if model["task_type"] == "cv/classification":
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_clf_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
+
+    # # 检测模型
+    # if model["task_type"] in ["cv/detection", "cv/pose_estimation"]:
+    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+    #     d_url = model["download_url"]
+    #     if d_url is not None:
+    #         result = run_detec_testcase(model)
+    #         check_model_result(result)
+    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+    #     logging.info(f"End running {model['name']} test case.")
+
+    # # OCR模型
+    # if model["task_type"] in ["cv/ocr"]:
+    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+    #     d_url = model["download_url"]
+    #     if d_url is not None:
+    #         result = run_ocr_testcase(model)
+    #         check_model_result(result)
+    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+    #     logging.info(f"End running {model['name']} test case.")
+
+    # # Trace模型
+    # if model["task_type"] in ["cv/trace"]:
+    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+    #     d_url = model["download_url"]
+    #     if d_url is not None:
+    #         result = run_trace_testcase(model)
+    #         check_model_result(result)
+    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+    #     logging.info(f"End running {model['name']} test case.")
+
+    # # Speech模型
+    # if model["task_type"] in ["speech/speech_recognition"]:
+    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+    #     d_url = model["download_url"]
+    #     if d_url is not None:
+    #         result = run_speech_testcase(model)
+    #         check_model_result(result)
+    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+    #     logging.info(f"End running {model['name']} test case.")
+
+    # # NLP模型
+    # if model["task_type"] in ["nlp/language_model"]:
+    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+    #     d_url = model["download_url"]
+    #     if d_url is not None:
+    #         result = run_nlp_testcase(model)
+    #         check_model_result(result)
+    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+    #     logging.info(f"End running {model['name']} test case.")
+
+    logging.info(f"Full text result: {result}")
+
+def get_model_config(mode_name):
+    with open("models_ixrt.yaml", "r") as file:
+        models = yaml.safe_load(file)
+
+    for model in models:
+        if model["name"] == mode_name.lower():
+            return model
+    return
+
+def check_model_result(result):
+    status = "PASS"
+    for prec in ["fp16", "int8"]:
+        if prec in result["result"]:
+            if result["result"][prec]["status"] == "FAIL":
+                status = "FAIL"
+                break
+    result["status"] = status
+
+def run_clf_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['relative_path']}
+    bash ci/prepare.sh
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['relative_path']}
+        export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val
+        export PROJ_DIR=./
+        export CHECKPOINTS_DIR=./checkpoints
+        export RUN_DIR=./
+        export CONFIG_DIR=config/{model_name}_CONFIG
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        fps_pattern = r"(?P<FPS>FPS\s*:\s*(\d+\.?\d*))"
+        acc1_pattern = r"(?P<Acc1>Acc@1\s*:\s*(\d+\.?\d*))"
+        acc5_pattern = r"(?P<Acc5>Acc@5\s*:\s*(\d+\.?\d*))"
+        e2e_pattern = r"(?P<E2E time>\s*E2E time\s*:\s*(\d+\.\d+)\s)"
+        combined_pattern = re.compile(f"{fps_pattern}|{acc1_pattern}|{acc5_pattern}|{e2e_pattern}")
+        matchs = combined_pattern.finditer(sout)
+        match_count = 0
+        for match in matchs:
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            for name, value in match.groupdict().items():
+                if value:
+                    match_count += 1
+                    try:
+                        result["result"][prec][name] = float(value)
+                    except ValueError:
+                        print("The string cannot be converted to a float.")
+                        result["result"][prec][name] = value
+
+        if match_count == 3:
+            result["result"][prec]["status"] = "PASS"
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+    return result
+
+def run_detec_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['relative_path']}
+    ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
+    bash ci/prepare.sh
+    """
+
+    # if model["need_third_part"] and model["3rd_party_repo"]:
+    #     third_party_repo = model["3rd_party_repo"]
+    #     prepare_script += f"unzip /mnt/deepspark/data/3rd_party/{third_party_repo}.zip -d ./\n"
+    # prepare_script += "bash ci/prepare.sh\n"
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['relative_path']}
+        export DATASETS_DIR=./{dataset_n}/
+
+        export MODEL_PATH=./{model_name}.onnx
+
+        export PROJ_DIR=./
+        export CHECKPOINTS_DIR=./checkpoints
+        export COCO_GT=./{dataset_n}/annotations/instances_val2017.json
+        export EVAL_DIR=./{dataset_n}/val2017
+        export RUN_DIR=./
+        export CONFIG_DIR=config/{model_name}_CONFIG
+
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        fps_pattern = r"(?P<FPS>FPS\s*:\s*(\d+\.?\d*))"
+        e2e_pattern = r"(?P<E2E time>\s*E2E time\s*:\s*(\d+\.\d+)\s)"
+        combined_pattern = re.compile(f"{fps_pattern}|{e2e_pattern}")
+        matchs = combined_pattern.finditer(sout)
+        for match in matchs:
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            for name, value in match.groupdict().items():
+                if value:
+                    try:
+                        result["result"][prec][name] = float(value)
+                    except ValueError:
+                        print("The string cannot be converted to a float.")
+                        result["result"][prec][name] = value
+        pattern = r"Average Precision  \(AP\) @\[ (IoU=0.50[:\d.]*)\s*\| area=   all \| maxDets=\s?\d+\s?\] =\s*([\d.]+)"
+        matchs = re.findall(pattern, sout)
+        for m in matchs:
+            result["result"].setdefault(prec, {})
+            try:
+                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1])}
+            except ValueError:
+                print("The string cannot be converted to a float.")
+                result["result"][prec] = result["result"][prec] | {m[0]: m[1]}
+        if matchs and len(matchs) == 2:
+            result["result"][prec]["status"] = "PASS"
+        else:
+            pattern = METRIC_PATTERN
+            matchs = re.findall(pattern, sout)
+            if matchs and len(matchs) == 1:
+                result["result"].setdefault(prec, {})
+                result["result"][prec].update(get_metric_result(matchs[0]))
+                result["result"][prec]["status"] = "PASS"
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+
+    return result
+
+def run_ocr_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['relative_path']}
+    ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./
+    ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
+    unzip /mnt/deepspark/data/3rd_party/PaddleOCR-release-2.6.zip -d ./PaddleOCR
+    bash ci/prepare.sh
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['relative_path']}
+        export DATASETS_DIR=./{dataset_n}/
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*"
+        matchs = re.findall(pattern, sout)
+        for m in matchs:
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            try:
+                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1]), m[2]: float(m[3])}
+            except ValueError:
+                print("The string cannot be converted to a float.")
+                result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
+
+        pattern = METRIC_PATTERN
+        matchs = re.findall(pattern, sout)
+        if matchs and len(matchs) == 1:
+            result["result"].setdefault(prec, {})
+            result["result"][prec].update(get_metric_result(matchs[0]))
+            result["result"][prec]["status"] = "PASS"
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+
+    return result
+
+def run_trace_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['relative_path']}
+    ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./
+    ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
+    """
+
+    if model["need_third_part"]:
+        prepare_script += "unzip /mnt/deepspark/data/3rd_party/fast-reid.zip -d ./fast-reid\n"
+
+    prepare_script += """
+    bash ci/prepare.sh
+    ls -l | grep onnx
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['relative_path']}
+        export DATASETS_DIR=./{dataset_n}/
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*"
+        matchs = re.findall(pattern, sout)
+        for m in matchs:
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            try:
+                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1]), m[2]: float(m[3])}
+            except ValueError:
+                print("The string cannot be converted to a float.")
+                result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
+        pattern = METRIC_PATTERN
+        matchs = re.findall(pattern, sout)
+        if matchs and len(matchs) == 1:
+            result["result"].setdefault(prec, {})
+            result["result"][prec].update(get_metric_result(matchs[0]))
+            result["result"][prec]["status"] = "PASS"
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+    return result
+
+# BERT series models
+def run_nlp_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
+    target_dirs = {"bert_base_squad": "csarron/bert-base-uncased-squad-v1", "bert_base_ner":"test", "bert_large_squad": "neuralmagic/bert-large-uncased-finetuned-squadv1"}
+    target_dir = target_dirs[model_name]
+    dirname = os.path.dirname(target_dir)
+    mkdir_script = f"mkdir -p {dirname}" if dirname else ""
+
+    prepare_script = f"""
+    set -x
+    cd ../{model['relative_path']}
+    {mkdir_script}
+    ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./{target_dir}
+    export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n}
+    bash ci/prepare.sh
+    """
+
+    # prepare int8 model for bert_large_squad
+    if model_name == "bert_large_squad":
+        prepare_script += "ln -s /mnt/deepspark/data/checkpoints/bert_large_int8.hdf5 ./\n"
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        set -x
+        export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n}
+        cd ../{model['relative_path']}
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        r, t = run_script(script)
+        sout = r.stdout
+
+        pattern = METRIC_PATTERN
+        matchs = re.findall(pattern, sout)
+        result["result"].setdefault(prec, {"status": "FAIL"})
+        logging.debug(f"matchs:\n{matchs}")
+        for m in matchs:
+            result["result"][prec].update(get_metric_result(m))
+        if len(matchs) == 2:
+            result["result"][prec]["status"] = "PASS"
+
+        result["result"][prec]["Cost time (s)"] = t
+    return result
+
+def run_speech_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['relative_path']}
+    ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./
+    ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
+    """
+
+    if model["need_third_part"] and model_name == "conformer":
+        prepare_script += "unzip /mnt/deepspark/data/3rd_party/kenlm.zip -d ./ctc_decoder/swig/kenlm\n"
+        prepare_script += "unzip /mnt/deepspark/data/3rd_party/ThreadPool.zip -d ./ctc_decoder/swig/ThreadPool\n"
+        prepare_script += "tar -xzvf /mnt/deepspark/data/3rd_party/openfst-1.6.3.tar.gz -C ./ctc_decoder/swig/\n"
+
+    prepare_script += """
+    export PYTHONPATH=`pwd`/wenet:$PYTHONPATH
+    echo $PYTHONPATH
+    bash ci/prepare.sh
+    ls -l | grep onnx
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['relative_path']}
+        export PYTHONPATH=./wenet:$PYTHONPATH
+        echo $PYTHONPATH
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*"
+        matchs = re.findall(pattern, sout)
+        for m in matchs:
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            try:
+                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1]), m[2]: float(m[3])}
+            except ValueError:
+                print("The string cannot be converted to a float.")
+                result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
+        pattern = METRIC_PATTERN
+        matchs = re.findall(pattern, sout)
+        if matchs and len(matchs) == 1:
+            result["result"].setdefault(prec, {})
+            result["result"][prec].update(get_metric_result(matchs[0]))
+            result["result"][prec]["status"] = "PASS"
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+    return result
+
+def get_metric_result(str):
+    if str:
+        return json.loads(str.replace("'", "\""))["metricResult"]
+    return None
+
+def run_script(script):
+    start_time = time.perf_counter()
+    result = subprocess.run(
+        script, shell=True, capture_output=True, text=True, executable="/bin/bash"
+    )
+    end_time = time.perf_counter()
+    execution_time = end_time - start_time
+    logging.debug(f"执行命令：\n{script}")
+    logging.debug("执行时间: {:.4f} 秒".format(execution_time))
+    logging.debug(f"标准输出: {result.stdout}")
+    logging.debug(f"标准错误: {result.stderr}")
+    logging.debug(f"返回码: {result.returncode}")
+    return result, execution_time
+
+if __name__ == "__main__":
+    main()
-- 
Gitee


From 6da13e2ab0e05dfe5a452fad2251ff5e140f4e85 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 7 Jan 2025 16:31:35 +0800
Subject: [PATCH 02/35] fix yaml error

---
 tests/models_ixrt.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/models_ixrt.yaml b/tests/models_ixrt.yaml
index d7bc0f2b..20746ced 100644
--- a/tests/models_ixrt.yaml
+++ b/tests/models_ixrt.yaml
@@ -155,7 +155,7 @@
   relative_path: models/cv/classification/repvgg/ixrt
   task_type: cv/classification
 - datasets: https://www.image-net.org/download.php
-  download_url:https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net50_14w_8s-6527dddc.pth
+  download_url: https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net50_14w_8s-6527dddc.pth
   name: res2net50
   need_third_part: true
   precisions:
@@ -504,4 +504,4 @@
   precisions:
     - fp16
   relative_path: models/speech/speech_recognition/transformer_asr/ixrt
-  task_type: speech/speech_recognition
\ No newline at end of file
+  task_type: speech/speech_recognition
-- 
Gitee


From 1d503637d226431b51056714d203500ec472428c Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 7 Jan 2025 18:01:04 +0800
Subject: [PATCH 03/35] fix config name and dataset path

---
 tests/run_ixrt.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index 04e077f6..36fbc343 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -154,15 +154,17 @@ def run_clf_testcase(model):
 
     run_script(prepare_script)
 
+    config_name = model_name.upper()
+
     for prec in model["precisions"]:
         logging.info(f"Start running {model_name} {prec} test case")
         script = f"""
         cd ../{model['relative_path']}
-        export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val
+        export DATASETS_DIR=/root/data/datasets/imagenet-val
         export PROJ_DIR=./
         export CHECKPOINTS_DIR=./checkpoints
         export RUN_DIR=./
-        export CONFIG_DIR=config/{model_name}_CONFIG
+        export CONFIG_DIR=config/{config_name}_CONFIG
         bash scripts/infer_{model_name}_{prec}_accuracy.sh
         bash scripts/infer_{model_name}_{prec}_performance.sh
         """
-- 
Gitee


From fc87553991665dcdf8630d33ecd56edbf34b9a90 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 8 Jan 2025 10:25:39 +0800
Subject: [PATCH 04/35] update match result

---
 tests/run_ixrt.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index 36fbc343..dde50721 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -146,7 +146,6 @@ def run_clf_testcase(model):
     cd ../{model['relative_path']}
     bash ci/prepare.sh
     """
-
     # add pip list info when in debug mode
     if utils.is_debug():
         pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
@@ -156,6 +155,15 @@ def run_clf_testcase(model):
 
     config_name = model_name.upper()
 
+    patterns = {
+        "FPS": r"FPS\s*:\s*(\d+\.?\d*)",
+        "Acc1": r"Acc@1\s*:\s*(\d+\.?\d*)",
+        "Acc5": r"Acc@5\s*:\s*(\d+\.?\d*)",
+        "E2E": r"E2E time\s*:\s*(\d+\.\d+)"
+    }
+
+    combined_pattern = re.compile("|".join(f"(?P<{name}>{pattern})" for name, pattern in patterns.items()))
+
     for prec in model["precisions"]:
         logging.info(f"Start running {model_name} {prec} test case")
         script = f"""
@@ -171,25 +179,17 @@ def run_clf_testcase(model):
 
         r, t = run_script(script)
         sout = r.stdout
-        fps_pattern = r"(?P<FPS>FPS\s*:\s*(\d+\.?\d*))"
-        acc1_pattern = r"(?P<Acc1>Acc@1\s*:\s*(\d+\.?\d*))"
-        acc5_pattern = r"(?P<Acc5>Acc@5\s*:\s*(\d+\.?\d*))"
-        e2e_pattern = r"(?P<E2E time>\s*E2E time\s*:\s*(\d+\.\d+)\s)"
-        combined_pattern = re.compile(f"{fps_pattern}|{acc1_pattern}|{acc5_pattern}|{e2e_pattern}")
         matchs = combined_pattern.finditer(sout)
+        result["result"].setdefault(prec, {"status": "FAIL"})
         match_count = 0
         for match in matchs:
-            result["result"].setdefault(prec, {"status": "FAIL"})
             for name, value in match.groupdict().items():
                 if value:
                     match_count += 1
-                    try:
-                        result["result"][prec][name] = float(value)
-                    except ValueError:
-                        print("The string cannot be converted to a float.")
-                        result["result"][prec][name] = value
+                    result["result"][prec][name] = float(value.split(":")[1].strip())
+                    break
 
-        if match_count == 3:
+        if match_count == len(patterns):
             result["result"][prec]["status"] = "PASS"
         result["result"][prec]["Cost time (s)"] = t
         logging.debug(f"matchs:\n{matchs}")
-- 
Gitee


From c019b54cb3c5c1f36c219282eeaf11a003211e29 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 8 Jan 2025 11:31:10 +0800
Subject: [PATCH 05/35] support detect

---
 tests/run_ixrt.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index dde50721..8f5343e2 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -64,15 +64,15 @@ def main():
             logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
         logging.info(f"End running {model['name']} test case.")
 
-    # # 检测模型
-    # if model["task_type"] in ["cv/detection", "cv/pose_estimation"]:
-    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
-    #     d_url = model["download_url"]
-    #     if d_url is not None:
-    #         result = run_detec_testcase(model)
-    #         check_model_result(result)
-    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
-    #     logging.info(f"End running {model['name']} test case.")
+    # 检测模型
+    if model["task_type"] in ["cv/detection", "cv/pose_estimation"]:
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_detec_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
 
     # # OCR模型
     # if model["task_type"] in ["cv/ocr"]:
@@ -186,7 +186,7 @@ def run_clf_testcase(model):
             for name, value in match.groupdict().items():
                 if value:
                     match_count += 1
-                    result["result"][prec][name] = float(value.split(":")[1].strip())
+                    result["result"][prec][name] = float(f"{float(value.split(":")[1].strip()):.3f}")
                     break
 
         if match_count == len(patterns):
@@ -222,6 +222,8 @@ def run_detec_testcase(model):
 
     run_script(prepare_script)
 
+    config_name = model_name.upper()
+
     for prec in model["precisions"]:
         logging.info(f"Start running {model_name} {prec} test case")
         script = f"""
@@ -235,7 +237,7 @@ def run_detec_testcase(model):
         export COCO_GT=./{dataset_n}/annotations/instances_val2017.json
         export EVAL_DIR=./{dataset_n}/val2017
         export RUN_DIR=./
-        export CONFIG_DIR=config/{model_name}_CONFIG
+        export CONFIG_DIR=config/{config_name}_CONFIG
 
         bash scripts/infer_{model_name}_{prec}_accuracy.sh
         bash scripts/infer_{model_name}_{prec}_performance.sh
@@ -244,7 +246,7 @@ def run_detec_testcase(model):
         r, t = run_script(script)
         sout = r.stdout
         fps_pattern = r"(?P<FPS>FPS\s*:\s*(\d+\.?\d*))"
-        e2e_pattern = r"(?P<E2E time>\s*E2E time\s*:\s*(\d+\.\d+)\s)"
+        e2e_pattern = r"(?P<E2E>\s*E2E time\s*:\s*(\d+\.\d+)\s)"
         combined_pattern = re.compile(f"{fps_pattern}|{e2e_pattern}")
         matchs = combined_pattern.finditer(sout)
         for match in matchs:
@@ -252,7 +254,8 @@ def run_detec_testcase(model):
             for name, value in match.groupdict().items():
                 if value:
                     try:
-                        result["result"][prec][name] = float(value)
+                        result["result"][prec][name] = float(f"{float(value.split(":")[1].strip()):.3f}")
+                        break
                     except ValueError:
                         print("The string cannot be converted to a float.")
                         result["result"][prec][name] = value
-- 
Gitee


From 7a726f6ec9f4d241369f8bc7f5b5add787a1dbb5 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 8 Jan 2025 11:31:51 +0800
Subject: [PATCH 06/35] update detect script

---
 models/cv/detection/centernet/ixrt/README.md  |  6 +--
 .../cv/detection/centernet/ixrt/ci/prepare.sh | 28 ++++++++++++
 .../cv/detection/centernet/ixrt/inference.py  |  6 ++-
 .../detection/centernet/ixrt/requirements.txt |  5 +++
 models/cv/detection/detr/ixrt/README.md       |  9 +---
 models/cv/detection/detr/ixrt/ci/prepare.sh   | 30 +++++++++++++
 models/cv/detection/detr/ixrt/inference.py    |  5 ++-
 .../cv/detection/detr/ixrt/requirements.txt   |  8 ++++
 models/cv/detection/fcos/ixrt/README.md       | 11 +----
 models/cv/detection/fcos/ixrt/ci/prepare.sh   | 29 ++++++++++++
 .../cv/detection/fcos/ixrt/requirements.txt   | 10 +++++
 models/cv/detection/yolov3/ixrt/README.md     |  8 +---
 models/cv/detection/yolov3/ixrt/ci/prepare.sh | 35 +++++++++++++++
 .../cv/detection/yolov3/ixrt/requirements.txt |  7 +++
 models/cv/detection/yolov4/ixrt/README.md     | 14 +++---
 models/cv/detection/yolov4/ixrt/ci/prepare.sh | 39 ++++++++++++++++
 .../cv/detection/yolov4/ixrt/requirements.txt |  5 +++
 ...uracy.sh => infer_yolov4_fp16_accuracy.sh} |  0
 ...ce.sh => infer_yolov4_fp16_performance.sh} |  0
 ...uracy.sh => infer_yolov4_int8_accuracy.sh} |  0
 ...ce.sh => infer_yolov4_int8_performance.sh} |  0
 models/cv/detection/yolov5/ixrt/README.md     | 26 +++++------
 models/cv/detection/yolov5/ixrt/ci/prepare.sh | 43 ++++++++++++++++++
 .../config/{YOLOV5M_CONFIG => YOLOV5_CONFIG}  |  0
 .../cv/detection/yolov5/ixrt/requirements.txt |  7 +++
 ...uracy.sh => infer_yolov5_fp16_accuracy.sh} |  0
 ...ce.sh => infer_yolov5_fp16_performance.sh} |  0
 ...uracy.sh => infer_yolov5_int8_accuracy.sh} |  0
 ...ce.sh => infer_yolov5_int8_performance.sh} |  0
 models/cv/detection/yolov5s/ixrt/README.md    |  6 +--
 .../cv/detection/yolov5s/ixrt/ci/prepare.sh   | 44 +++++++++++++++++++
 .../detection/yolov5s/ixrt/requirements.txt   |  5 +++
 models/cv/detection/yolov6/ixrt/README.md     | 22 +++++-----
 models/cv/detection/yolov6/ixrt/ci/prepare.sh | 38 ++++++++++++++++
 .../cv/detection/yolov6/ixrt/requirements.txt |  5 +++
 ...uracy.sh => infer_yolov6_fp16_accuracy.sh} |  2 +-
 ...ce.sh => infer_yolov6_fp16_performance.sh} |  2 +-
 ...uracy.sh => infer_yolov6_int8_accuracy.sh} |  2 +-
 ...ce.sh => infer_yolov6_int8_performance.sh} |  2 +-
 models/cv/detection/yolov7/ixrt/README.md     | 18 +++-----
 models/cv/detection/yolov7/ixrt/ci/prepare.sh | 34 ++++++++++++++
 .../config/{YOLOV7M_CONFIG => YOLOV7_CONFIG}  |  0
 .../cv/detection/yolov7/ixrt/requirements.txt |  7 +++
 ...uracy.sh => infer_yolov7_fp16_accuracy.sh} |  0
 ...ce.sh => infer_yolov7_fp16_performance.sh} |  0
 ...uracy.sh => infer_yolov7_int8_accuracy.sh} |  0
 ...ce.sh => infer_yolov7_int8_performance.sh} |  0
 models/cv/detection/yolov8/ixrt/ci/prepare.sh | 27 ++++++++++++
 models/cv/detection/yolox/ixrt/ci/prepare.sh  | 27 ++++++++++++
 49 files changed, 482 insertions(+), 90 deletions(-)
 create mode 100644 models/cv/detection/centernet/ixrt/ci/prepare.sh
 create mode 100644 models/cv/detection/centernet/ixrt/requirements.txt
 create mode 100644 models/cv/detection/detr/ixrt/ci/prepare.sh
 create mode 100644 models/cv/detection/detr/ixrt/requirements.txt
 create mode 100644 models/cv/detection/fcos/ixrt/ci/prepare.sh
 create mode 100644 models/cv/detection/fcos/ixrt/requirements.txt
 create mode 100644 models/cv/detection/yolov3/ixrt/ci/prepare.sh
 create mode 100644 models/cv/detection/yolov3/ixrt/requirements.txt
 create mode 100644 models/cv/detection/yolov4/ixrt/ci/prepare.sh
 create mode 100644 models/cv/detection/yolov4/ixrt/requirements.txt
 rename models/cv/detection/yolov4/ixrt/scripts/{infer_yolov4darknet_fp16_accuracy.sh => infer_yolov4_fp16_accuracy.sh} (100%)
 rename models/cv/detection/yolov4/ixrt/scripts/{infer_yolov4darknet_fp16_performance.sh => infer_yolov4_fp16_performance.sh} (100%)
 rename models/cv/detection/yolov4/ixrt/scripts/{infer_yolov4darknet_int8_accuracy.sh => infer_yolov4_int8_accuracy.sh} (100%)
 rename models/cv/detection/yolov4/ixrt/scripts/{infer_yolov4darknet_int8_performance.sh => infer_yolov4_int8_performance.sh} (100%)
 create mode 100644 models/cv/detection/yolov5/ixrt/ci/prepare.sh
 rename models/cv/detection/yolov5/ixrt/config/{YOLOV5M_CONFIG => YOLOV5_CONFIG} (100%)
 create mode 100644 models/cv/detection/yolov5/ixrt/requirements.txt
 rename models/cv/detection/yolov5/ixrt/scripts/{infer_yolov5m_fp16_accuracy.sh => infer_yolov5_fp16_accuracy.sh} (100%)
 rename models/cv/detection/yolov5/ixrt/scripts/{infer_yolov5m_fp16_performance.sh => infer_yolov5_fp16_performance.sh} (100%)
 rename models/cv/detection/yolov5/ixrt/scripts/{infer_yolov5m_int8_accuracy.sh => infer_yolov5_int8_accuracy.sh} (100%)
 rename models/cv/detection/yolov5/ixrt/scripts/{infer_yolov5m_int8_performance.sh => infer_yolov5_int8_performance.sh} (100%)
 create mode 100644 models/cv/detection/yolov5s/ixrt/ci/prepare.sh
 create mode 100644 models/cv/detection/yolov5s/ixrt/requirements.txt
 create mode 100644 models/cv/detection/yolov6/ixrt/ci/prepare.sh
 create mode 100644 models/cv/detection/yolov6/ixrt/requirements.txt
 rename models/cv/detection/yolov6/ixrt/scripts/{infer_yolov6s_fp16_accuracy.sh => infer_yolov6_fp16_accuracy.sh} (98%)
 rename models/cv/detection/yolov6/ixrt/scripts/{infer_yolov6s_fp16_performance.sh => infer_yolov6_fp16_performance.sh} (98%)
 rename models/cv/detection/yolov6/ixrt/scripts/{infer_yolov6s_int8_accuracy.sh => infer_yolov6_int8_accuracy.sh} (98%)
 rename models/cv/detection/yolov6/ixrt/scripts/{infer_yolov6s_int8_performance.sh => infer_yolov6_int8_performance.sh} (98%)
 create mode 100644 models/cv/detection/yolov7/ixrt/ci/prepare.sh
 rename models/cv/detection/yolov7/ixrt/config/{YOLOV7M_CONFIG => YOLOV7_CONFIG} (100%)
 create mode 100644 models/cv/detection/yolov7/ixrt/requirements.txt
 rename models/cv/detection/yolov7/ixrt/scripts/{infer_yolov7m_fp16_accuracy.sh => infer_yolov7_fp16_accuracy.sh} (100%)
 rename models/cv/detection/yolov7/ixrt/scripts/{infer_yolov7m_fp16_performance.sh => infer_yolov7_fp16_performance.sh} (100%)
 rename models/cv/detection/yolov7/ixrt/scripts/{infer_yolov7m_int8_accuracy.sh => infer_yolov7_int8_accuracy.sh} (100%)
 rename models/cv/detection/yolov7/ixrt/scripts/{infer_yolov7m_int8_performance.sh => infer_yolov7_int8_performance.sh} (100%)

diff --git a/models/cv/detection/centernet/ixrt/README.md b/models/cv/detection/centernet/ixrt/README.md
index e7b51ca9..8978b2b4 100644
--- a/models/cv/detection/centernet/ixrt/README.md
+++ b/models/cv/detection/centernet/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install onnx
-pip3 install tqdm
-pip3 install mmdet
-pip3 install mmdeploy
-pip3 install mmengine
+pip3 install -r requirements.txt
 # Contact the Iluvatar administrator to get the mmcv install package.
 ```
 
diff --git a/models/cv/detection/centernet/ixrt/ci/prepare.sh b/models/cv/detection/centernet/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..fb6ab855
--- /dev/null
+++ b/models/cv/detection/centernet/ixrt/ci/prepare.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+python3 export.py --weight /root/data/checkpoints/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth --cfg centernet_r18_8xb16-crop512-140e_coco.py --output centernet.onnx
\ No newline at end of file
diff --git a/models/cv/detection/centernet/ixrt/inference.py b/models/cv/detection/centernet/ixrt/inference.py
index 3e7f954f..e65c681f 100644
--- a/models/cv/detection/centernet/ixrt/inference.py
+++ b/models/cv/detection/centernet/ixrt/inference.py
@@ -126,7 +126,8 @@ def main():
         cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
 
         runner = RUNNERS.build(cfg)
-        
+
+        start_time = time.time()
         for input_data in tqdm(runner.test_dataloader):
             
             input_data = runner.model.data_preprocessor(input_data, False)
@@ -178,6 +179,9 @@ def main():
 
             runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=input_data)
 
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))
 
 if __name__ == "__main__":
diff --git a/models/cv/detection/centernet/ixrt/requirements.txt b/models/cv/detection/centernet/ixrt/requirements.txt
new file mode 100644
index 00000000..91b58790
--- /dev/null
+++ b/models/cv/detection/centernet/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+onnx
+tqdm
+mmdet
+mmdeploy
+mmengine
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/README.md b/models/cv/detection/detr/ixrt/README.md
index 28df3f60..1704b35c 100755
--- a/models/cv/detection/detr/ixrt/README.md
+++ b/models/cv/detection/detr/ixrt/README.md
@@ -15,14 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/detection/detr/ixrt/ci/prepare.sh b/models/cv/detection/detr/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..b70efe4d
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+
+mkdir checkpoints
+python3 export_model.py --torch_file /root/data/checkpoints/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth --onnx_file checkpoints/detr_res50.onnx --bsz 1
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/inference.py b/models/cv/detection/detr/ixrt/inference.py
index eb33b614..7da3b286 100755
--- a/models/cv/detection/detr/ixrt/inference.py
+++ b/models/cv/detection/detr/ixrt/inference.py
@@ -94,6 +94,7 @@ def main(config):
     print(f"pred_logits shape : {batch_pred_logits.shape} pred_logits type : {batch_pred_logits.dtype}")
     print(f"pred_boxes shape : {batch_pred_boxes.shape} pred_boxes type : {batch_pred_boxes.dtype}")
 
+    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -128,7 +129,9 @@ def main(config):
                 # ipdb.set_trace()
                       
                 save2json(img_id, pred_boxes, json_result)
-
+    end_time = time.time()
+    e2e_time = end_time - start_time
+    print(F"E2E time : {e2e_time:.3f} seconds")
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
diff --git a/models/cv/detection/detr/ixrt/requirements.txt b/models/cv/detection/detr/ixrt/requirements.txt
new file mode 100644
index 00000000..94d8e7c4
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/requirements.txt
@@ -0,0 +1,8 @@
+tqdm
+pycuda
+onnx
+onnxsim
+tabulate
+cv2
+pycocotools
+opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/detection/fcos/ixrt/README.md b/models/cv/detection/fcos/ixrt/README.md
index 49db1e04..51e30390 100755
--- a/models/cv/detection/fcos/ixrt/README.md
+++ b/models/cv/detection/fcos/ixrt/README.md
@@ -16,16 +16,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install addict
-pip3 install yapf
-pip3 install pycuda
-pip3 install mmdet==2.28.2
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Dependency
diff --git a/models/cv/detection/fcos/ixrt/ci/prepare.sh b/models/cv/detection/fcos/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..1f0ab34a
--- /dev/null
+++ b/models/cv/detection/fcos/ixrt/ci/prepare.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+python3 export.py --weight fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth --cfg fcos_r50_caffe_fpn_gn-head_1x_coco.py --output fcos.onnx
+onnxsim fcos.onnx fcos_opt.onnx
\ No newline at end of file
diff --git a/models/cv/detection/fcos/ixrt/requirements.txt b/models/cv/detection/fcos/ixrt/requirements.txt
new file mode 100644
index 00000000..3a911f40
--- /dev/null
+++ b/models/cv/detection/fcos/ixrt/requirements.txt
@@ -0,0 +1,10 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+addict
+yapf
+pycuda
+mmdet==2.28.2
+opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/detection/yolov3/ixrt/README.md b/models/cv/detection/yolov3/ixrt/README.md
index f79543fd..285c8a3a 100644
--- a/models/cv/detection/yolov3/ixrt/README.md
+++ b/models/cv/detection/yolov3/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install cv2
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/detection/yolov3/ixrt/ci/prepare.sh b/models/cv/detection/yolov3/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..2fea690f
--- /dev/null
+++ b/models/cv/detection/yolov3/ixrt/ci/prepare.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+mkdir checkpoints
+unzip -q /root/data/3rd_party/onnx_tflite_yolov3.zip -d ./
+mv yolov3.weights onnx_tflite_yolov3/weights
+cd onnx_tflite_yolov3
+python3 detect.py --cfg cfg/yolov3.cfg --weights weights/yolov3.weights
+mv export.onnx ../checkpoints/yolov3.onnx
+cd ..
diff --git a/models/cv/detection/yolov3/ixrt/requirements.txt b/models/cv/detection/yolov3/ixrt/requirements.txt
new file mode 100644
index 00000000..108d4fab
--- /dev/null
+++ b/models/cv/detection/yolov3/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+cv2
+opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/detection/yolov4/ixrt/README.md b/models/cv/detection/yolov4/ixrt/README.md
index 03c632d7..ae94ae43 100644
--- a/models/cv/detection/yolov4/ixrt/README.md
+++ b/models/cv/detection/yolov4/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install pycocotools
-pip3 install pycuda
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -55,18 +51,18 @@ onnxsim data/yolov4.onnx data/yolov4_sim.onnx
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov4darknet_fp16_accuracy.sh
+bash scripts/infer_yolov4_fp16_accuracy.sh
 # Performance
-bash scripts/infer_yolov4darknet_fp16_performance.sh
+bash scripts/infer_yolov4_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov4darknet_int8_accuracy.sh
+bash scripts/infer_yolov4_int8_accuracy.sh
 # Performance
-bash scripts/infer_yolov4darknet_int8_performance.sh
+bash scripts/infer_yolov4_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/detection/yolov4/ixrt/ci/prepare.sh b/models/cv/detection/yolov4/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..e801c280
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/ci/prepare.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+# clone yolov4
+git clone --depth 1 https://github.com/Tianxiaomo/pytorch-YOLOv4.git yolov4
+
+mkdir data
+# export onnx model
+python3 export.py --cfg yolov4/cfg/yolov4.cfg --weight /root/data/checkpoints/yolov4.weights --batchsize 16 --output data/yolov4.onnx
+mv yolov4_16_3_608_608_static.onnx data/yolov4.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim data/yolov4.onnx data/yolov4_sim.onnx
diff --git a/models/cv/detection/yolov4/ixrt/requirements.txt b/models/cv/detection/yolov4/ixrt/requirements.txt
new file mode 100644
index 00000000..c5ff461d
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+pycocotools
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_accuracy.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_accuracy.sh
rename to models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_accuracy.sh
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_performance.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_performance.sh
similarity index 100%
rename from models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_performance.sh
rename to models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_performance.sh
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_accuracy.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_accuracy.sh
rename to models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_accuracy.sh
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_performance.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_performance.sh
similarity index 100%
rename from models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_performance.sh
rename to models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_performance.sh
diff --git a/models/cv/detection/yolov5/ixrt/README.md b/models/cv/detection/yolov5/ixrt/README.md
index a00b7c16..ea1fc180 100644
--- a/models/cv/detection/yolov5/ixrt/README.md
+++ b/models/cv/detection/yolov5/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install cv2
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -54,36 +48,36 @@ mv yolov5m.onnx /Path/to/checkpoints
 ## Inference
 
 ```bash
-export PROJ_DIR=/Path/to/yolov5m/ixrt
+export PROJ_DIR=/Path/to/yolov5/ixrt
 export DATASETS_DIR=/Path/to/coco2017/
 export CHECKPOINTS_DIR=./checkpoints
 export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 export EVAL_DIR=${DATASETS_DIR}/val2017
-export RUN_DIR=/Path/to/yolov5m/ixrt
-export CONFIG_DIR=config/YOLOV5M_CONFIG
+export RUN_DIR=/Path/to/yolov5/ixrt
+export CONFIG_DIR=config/YOLOV5_CONFIG
 ```
 
 ### FP16
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov5m_fp16_accuracy.sh
+bash scripts/infer_yolov5_fp16_accuracy.sh
 # Performance
-bash scripts/infer_yolov5m_fp16_performance.sh
+bash scripts/infer_yolov5_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov5m_int8_accuracy.sh
+bash scripts/infer_yolov5_int8_accuracy.sh
 # Performance
-bash scripts/infer_yolov5m_int8_performance.sh
+bash scripts/infer_yolov5_int8_performance.sh
 ```
 
 ## Results
 
 Model   |BatchSize  |Precision |FPS      |MAP@0.5   |MAP@0.5:0.95 |
 --------|-----------|----------|---------|----------|-------------|
-YOLOv5m |    32     |   FP16   | 680.93  |  0.637   |  0.447      |
-YOLOv5m |    32     |   INT8   | 1328.50 |  0.627   |  0.425      |
+YOLOv5 |    32     |   FP16   | 680.93  |  0.637   |  0.447      |
+YOLOv5 |    32     |   INT8   | 1328.50 |  0.627   |  0.425      |
diff --git a/models/cv/detection/yolov5/ixrt/ci/prepare.sh b/models/cv/detection/yolov5/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..c1729249
--- /dev/null
+++ b/models/cv/detection/yolov5/ixrt/ci/prepare.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+mkdir checkpoints
+git clone https://github.com/ultralytics/yolov5
+# 切换到需要的版本分支
+cd yolov5
+git checkout v6.1
+
+# 有一些环境需要安装
+wget https://ultralytics.com/assets/Arial.ttf
+cp Arial.ttf  /root/.config/Ultralytics/Arial.ttf
+
+# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数)
+python3 export.py --weights /root/data/checkpoints/yolov5m.pt --include onnx --opset 11 --batch-size 32
+mv yolov5m.onnx ./checkpoints
+cd ..
diff --git a/models/cv/detection/yolov5/ixrt/config/YOLOV5M_CONFIG b/models/cv/detection/yolov5/ixrt/config/YOLOV5_CONFIG
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/config/YOLOV5M_CONFIG
rename to models/cv/detection/yolov5/ixrt/config/YOLOV5_CONFIG
diff --git a/models/cv/detection/yolov5/ixrt/requirements.txt b/models/cv/detection/yolov5/ixrt/requirements.txt
new file mode 100644
index 00000000..108d4fab
--- /dev/null
+++ b/models/cv/detection/yolov5/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+cv2
+opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_accuracy.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_fp16_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_accuracy.sh
rename to models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_fp16_accuracy.sh
diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_performance.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_fp16_performance.sh
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_performance.sh
rename to models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_fp16_performance.sh
diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_accuracy.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_int8_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_accuracy.sh
rename to models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_int8_accuracy.sh
diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_performance.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_int8_performance.sh
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_performance.sh
rename to models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_int8_performance.sh
diff --git a/models/cv/detection/yolov5s/ixrt/README.md b/models/cv/detection/yolov5s/ixrt/README.md
index 62232386..689ed69b 100755
--- a/models/cv/detection/yolov5s/ixrt/README.md
+++ b/models/cv/detection/yolov5s/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/detection/yolov5s/ixrt/ci/prepare.sh b/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..1621bed5
--- /dev/null
+++ b/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+mkdir -p checkpoints
+git clone https://github.com/ultralytics/yolov5
+# 切换到需要的版本分支
+cd yolov5/
+git checkout v6.1
+
+# 有一些环境需要安装
+wget https://ultralytics.com/assets/Arial.ttf
+mkdir -p /root/.config/Ultralytics
+cp Arial.ttf  /root/.config/Ultralytics/Arial.ttf
+
+# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数)
+python3 export.py --weights /root/data/checkpoints/yolov5s.pt --include onnx --opset 11 --batch-size 32
+mv yolov5s.onnx ../checkpoints
+cd ..
diff --git a/models/cv/detection/yolov5s/ixrt/requirements.txt b/models/cv/detection/yolov5s/ixrt/requirements.txt
new file mode 100644
index 00000000..4f51a92e
--- /dev/null
+++ b/models/cv/detection/yolov5s/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
\ No newline at end of file
diff --git a/models/cv/detection/yolov6/ixrt/README.md b/models/cv/detection/yolov6/ixrt/README.md
index 2248bb5a..5d0acbcd 100644
--- a/models/cv/detection/yolov6/ixrt/README.md
+++ b/models/cv/detection/yolov6/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-glx
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install pycocotools
-pip3 install pycuda
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -31,14 +27,12 @@ Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the valida
 ```bash
 # get yolov6s.pt
 wget https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s.pt
-# set coco path
-mkdir -p data/
-ln -s /Path/to/coco/ data/coco
 ```
 
 ### Model Conversion
 
 ```bash
+mkdir -p data/
 # install yolov6
 git clone https://github.com/meituan/YOLOv6.git
 
@@ -54,22 +48,26 @@ popd
 
 ## Inference
 
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
 ### FP16
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov6s_fp16_accuracy.sh
+bash scripts/infer_yolov6_fp16_accuracy.sh
 # Performance
-bash scripts/infer_yolov6s_fp16_performance.sh
+bash scripts/infer_yolov6_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov6s_int8_accuracy.sh
+bash scripts/infer_yolov6_int8_accuracy.sh
 # Performance
-bash scripts/infer_yolov6s_int8_performance.sh
+bash scripts/infer_yolov6_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/detection/yolov6/ixrt/ci/prepare.sh b/models/cv/detection/yolov6/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..061db478
--- /dev/null
+++ b/models/cv/detection/yolov6/ixrt/ci/prepare.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+mkdir -p data/
+git clone https://github.com/meituan/YOLOv6.git
+
+cd YOLOv6
+pip3 install -r requirements.txt
+
+# export onnx model
+python3 deploy/ONNX/export_onnx.py --weights /root/data/checkpoints/yolov6s.pt --img 640 --batch-size 32 --simplify
+mv ../yolov6s.onnx ../data/
+cd ..
diff --git a/models/cv/detection/yolov6/ixrt/requirements.txt b/models/cv/detection/yolov6/ixrt/requirements.txt
new file mode 100644
index 00000000..c5ff461d
--- /dev/null
+++ b/models/cv/detection/yolov6/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+pycocotools
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_accuracy.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_accuracy.sh
similarity index 98%
rename from models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_accuracy.sh
rename to models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_accuracy.sh
index 09cc0ac0..852aca0f 100644
--- a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_accuracy.sh
+++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_accuracy.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_performance.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_performance.sh
similarity index 98%
rename from models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_performance.sh
rename to models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_performance.sh
index 409fd354..5de30b1c 100644
--- a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_performance.sh
+++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_performance.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_accuracy.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_accuracy.sh
similarity index 98%
rename from models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_accuracy.sh
rename to models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_accuracy.sh
index 701f80f0..e7099ba0 100644
--- a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_accuracy.sh
+++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_accuracy.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_performance.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_performance.sh
similarity index 98%
rename from models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_performance.sh
rename to models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_performance.sh
index 58f77417..85a36fad 100644
--- a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_performance.sh
+++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_performance.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov7/ixrt/README.md b/models/cv/detection/yolov7/ixrt/README.md
index 4e7375de..a2716c7a 100644
--- a/models/cv/detection/yolov7/ixrt/README.md
+++ b/models/cv/detection/yolov7/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install cv2
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -53,25 +47,25 @@ export CHECKPOINTS_DIR=./checkpoints
 export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 export EVAL_DIR=${DATASETS_DIR}/val2017
 export RUN_DIR=/Path/to/yolov7/ixrt
-export CONFIG_DIR=config/YOLOV7M_CONFIG
+export CONFIG_DIR=config/YOLOV7_CONFIG
 ```
 
 ### FP16
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov7m_fp16_accuracy.sh
+bash scripts/infer_yolov7_fp16_accuracy.sh
 # Performance
-bash scripts/infer_yolov7m_fp16_performance.sh
+bash scripts/infer_yolov7_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov7m_int8_accuracy.sh
+bash scripts/infer_yolov7_int8_accuracy.sh
 # Performance
-bash scripts/infer_yolov7m_int8_performance.sh
+bash scripts/infer_yolov7_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/detection/yolov7/ixrt/ci/prepare.sh b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..8b6d9fbf
--- /dev/null
+++ b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+mkdir -p checkpoints
+git clone https://github.com/WongKinYiu/yolov7.git
+cd yolov7
+python3 export.py --weights /root/data/checkpoints/yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 32
+mv yolov7.onnx ../checkpoints/yolov7m.onnx
+cd ..
diff --git a/models/cv/detection/yolov7/ixrt/config/YOLOV7M_CONFIG b/models/cv/detection/yolov7/ixrt/config/YOLOV7_CONFIG
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/config/YOLOV7M_CONFIG
rename to models/cv/detection/yolov7/ixrt/config/YOLOV7_CONFIG
diff --git a/models/cv/detection/yolov7/ixrt/requirements.txt b/models/cv/detection/yolov7/ixrt/requirements.txt
new file mode 100644
index 00000000..108d4fab
--- /dev/null
+++ b/models/cv/detection/yolov7/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+cv2
+opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_accuracy.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_fp16_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_accuracy.sh
rename to models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_fp16_accuracy.sh
diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_performance.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_fp16_performance.sh
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_performance.sh
rename to models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_fp16_performance.sh
diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_accuracy.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_int8_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_accuracy.sh
rename to models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_int8_accuracy.sh
diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_performance.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_int8_performance.sh
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_performance.sh
rename to models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_int8_performance.sh
diff --git a/models/cv/detection/yolov8/ixrt/ci/prepare.sh b/models/cv/detection/yolov8/ixrt/ci/prepare.sh
index 6eca919e..58d524a6 100644
--- a/models/cv/detection/yolov8/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov8/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir -p checkpoints
 ln -s /root/data/checkpoints/yolov8.pt yolov8.pt
diff --git a/models/cv/detection/yolox/ixrt/ci/prepare.sh b/models/cv/detection/yolox/ixrt/ci/prepare.sh
index 7c58f593..26371b34 100644
--- a/models/cv/detection/yolox/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolox/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 unzip /root/data/repos/yolox-f00a798c8bf59f43ab557a2f3d566afa831c8887.zip -d ./
 ln -s /root/data/checkpoints/yolox_m.pth ./YOLOX/
-- 
Gitee


From ee04f171c848cb3a76273ea80a97c9ce9b19a747 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 8 Jan 2025 11:36:36 +0800
Subject: [PATCH 07/35] fix comma error

---
 tests/run_ixrt.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index 8f5343e2..4215221e 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -186,7 +186,7 @@ def run_clf_testcase(model):
             for name, value in match.groupdict().items():
                 if value:
                     match_count += 1
-                    result["result"][prec][name] = float(f"{float(value.split(":")[1].strip()):.3f}")
+                    result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}")
                     break
 
         if match_count == len(patterns):
@@ -254,7 +254,7 @@ def run_detec_testcase(model):
             for name, value in match.groupdict().items():
                 if value:
                     try:
-                        result["result"][prec][name] = float(f"{float(value.split(":")[1].strip()):.3f}")
+                        result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}")
                         break
                     except ValueError:
                         print("The string cannot be converted to a float.")
-- 
Gitee


From c61958bf39ecf2248f2bb21f512502216104dc1f Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 8 Jan 2025 13:23:25 +0800
Subject: [PATCH 08/35] fix yolov3 requirement error

---
 models/cv/detection/detr/ixrt/requirements.txt   | 1 -
 models/cv/detection/yolov3/ixrt/README.md        | 6 +++---
 models/cv/detection/yolov3/ixrt/ci/prepare.sh    | 4 ++--
 models/cv/detection/yolov3/ixrt/requirements.txt | 4 ++--
 models/cv/detection/yolov5/ixrt/requirements.txt | 4 ++--
 models/cv/detection/yolov7/ixrt/requirements.txt | 4 ++--
 6 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/models/cv/detection/detr/ixrt/requirements.txt b/models/cv/detection/detr/ixrt/requirements.txt
index 94d8e7c4..967c8c68 100644
--- a/models/cv/detection/detr/ixrt/requirements.txt
+++ b/models/cv/detection/detr/ixrt/requirements.txt
@@ -3,6 +3,5 @@ pycuda
 onnx
 onnxsim
 tabulate
-cv2
 pycocotools
 opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/detection/yolov3/ixrt/README.md b/models/cv/detection/yolov3/ixrt/README.md
index 285c8a3a..fc531ab3 100644
--- a/models/cv/detection/yolov3/ixrt/README.md
+++ b/models/cv/detection/yolov3/ixrt/README.md
@@ -38,7 +38,7 @@ mv yolov3.weights onnx_tflite_yolov3/weights
 # 修改 detect.py 中 torch.onnx.export() 函数的opset_version=11,会在/weights下生成export.onnx
 python3 detect.py --cfg cfg/yolov3.cfg --weights weights/yolov3.weights
 
-mv export.onnx /Path/to/checkpoints/yolov3.onnx
+mv weights/export.onnx /Path/to/checkpoints/yolov3.onnx
 ```
 
 ## Inference
@@ -47,8 +47,8 @@ mv export.onnx /Path/to/checkpoints/yolov3.onnx
 export PROJ_DIR=/Path/to/yolov3/ixrt
 export DATASETS_DIR=/Path/to/coco2017/
 export CHECKPOINTS_DIR=./checkpoints
-export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
-export EVAL_DIR=${DATASETS_DIR}/val2017
+export COCO_GT=./coco/annotations/instances_val2017.json
+export EVAL_DIR=./coco/val2017
 export RUN_DIR=/Path/to/yolov3/ixrt
 export CONFIG_DIR=config/YOLOV3_CONFIG
 ```
diff --git a/models/cv/detection/yolov3/ixrt/ci/prepare.sh b/models/cv/detection/yolov3/ixrt/ci/prepare.sh
index 2fea690f..9fb652f1 100644
--- a/models/cv/detection/yolov3/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov3/ixrt/ci/prepare.sh
@@ -28,8 +28,8 @@ fi
 pip3 install -r requirements.txt
 mkdir checkpoints
 unzip -q /root/data/3rd_party/onnx_tflite_yolov3.zip -d ./
-mv yolov3.weights onnx_tflite_yolov3/weights
+cp /root/data/checkpoints/yolov3.weights onnx_tflite_yolov3/weights
 cd onnx_tflite_yolov3
 python3 detect.py --cfg cfg/yolov3.cfg --weights weights/yolov3.weights
-mv export.onnx ../checkpoints/yolov3.onnx
+mv weights/export.onnx ../checkpoints/yolov3.onnx
 cd ..
diff --git a/models/cv/detection/yolov3/ixrt/requirements.txt b/models/cv/detection/yolov3/ixrt/requirements.txt
index 108d4fab..f2ec37c1 100644
--- a/models/cv/detection/yolov3/ixrt/requirements.txt
+++ b/models/cv/detection/yolov3/ixrt/requirements.txt
@@ -3,5 +3,5 @@ onnx
 onnxsim
 ultralytics
 pycocotools
-cv2
-opencv-python==4.6.0.66
\ No newline at end of file
+opencv-python==4.6.0.66
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/yolov5/ixrt/requirements.txt b/models/cv/detection/yolov5/ixrt/requirements.txt
index 108d4fab..f2ec37c1 100644
--- a/models/cv/detection/yolov5/ixrt/requirements.txt
+++ b/models/cv/detection/yolov5/ixrt/requirements.txt
@@ -3,5 +3,5 @@ onnx
 onnxsim
 ultralytics
 pycocotools
-cv2
-opencv-python==4.6.0.66
\ No newline at end of file
+opencv-python==4.6.0.66
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/yolov7/ixrt/requirements.txt b/models/cv/detection/yolov7/ixrt/requirements.txt
index 108d4fab..f2ec37c1 100644
--- a/models/cv/detection/yolov7/ixrt/requirements.txt
+++ b/models/cv/detection/yolov7/ixrt/requirements.txt
@@ -3,5 +3,5 @@ onnx
 onnxsim
 ultralytics
 pycocotools
-cv2
-opencv-python==4.6.0.66
\ No newline at end of file
+opencv-python==4.6.0.66
+pycuda
\ No newline at end of file
-- 
Gitee


From 8efba93d3770284dffbb9b087dd6606e449b15e4 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 8 Jan 2025 13:42:06 +0800
Subject: [PATCH 09/35] add e2e time

---
 models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py | 4 ++++
 models/cv/detection/yolov3/ixrt/inference.py         | 5 ++++-
 models/cv/detection/yolov4/ixrt/inference.py         | 5 ++++-
 models/cv/detection/yolov5/ixrt/inference.py         | 4 ++++
 models/cv/detection/yolov5s/ixrt/inference.py        | 4 ++++
 models/cv/detection/yolov6/ixrt/inference.py         | 4 ++++
 models/cv/detection/yolov7/ixrt/inference.py         | 4 ++++
 7 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py b/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py
index a5bdf3e7..9218ea3a 100644
--- a/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py
+++ b/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py
@@ -168,7 +168,11 @@ def main():
     inputs, outputs, allocations = get_io_bindings(engine)
         
     if args.task=="precision":
+        start_time = time.time()
         map50= eval_coco(args,inputs, outputs, allocations, context)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         
         print("="*40)
         print("MAP50:{0}".format(round(map50,3)))
diff --git a/models/cv/detection/yolov3/ixrt/inference.py b/models/cv/detection/yolov3/ixrt/inference.py
index a7a60c87..dab869b7 100644
--- a/models/cv/detection/yolov3/ixrt/inference.py
+++ b/models/cv/detection/yolov3/ixrt/inference.py
@@ -101,6 +101,7 @@ def main(config):
     output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
     print(f"output shape : {output.shape} output type : {output.dtype}")
 
+    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -153,7 +154,8 @@ def main(config):
                 max_det=config.max_det
             )
             save2json(batch_img_id, pred_boxes, json_result, class_map)
-
+    end_time = time.time()
+    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -195,6 +197,7 @@ def main(config):
         map, map50 = eval.stats[:2]
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov4/ixrt/inference.py b/models/cv/detection/yolov4/ixrt/inference.py
index 5d740507..cbc2debb 100644
--- a/models/cv/detection/yolov4/ixrt/inference.py
+++ b/models/cv/detection/yolov4/ixrt/inference.py
@@ -68,6 +68,7 @@ def main(config):
     forward_time = 0.0
     class_map = coco80_to_coco91_class()
     num_samples = 0
+    start_time = time.time()
     # Step3: Run on coco dataset
     for batch_names, batch_images, batch_shapes in tqdm(zip(*dataloader)):
         batch_data = np.ascontiguousarray(batch_images)
@@ -110,7 +111,8 @@ def main(config):
                 pred_results.append(pred_box.tolist())
 
             save2json(batch_img_id, pred_results, json_result, class_map)
-
+    end_time = time.time()
+    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -152,6 +154,7 @@ def main(config):
         map, map50 = eval.stats[:2]
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov5/ixrt/inference.py b/models/cv/detection/yolov5/ixrt/inference.py
index a7a60c87..554665fe 100644
--- a/models/cv/detection/yolov5/ixrt/inference.py
+++ b/models/cv/detection/yolov5/ixrt/inference.py
@@ -101,6 +101,7 @@ def main(config):
     output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
     print(f"output shape : {output.shape} output type : {output.dtype}")
 
+    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -154,6 +155,8 @@ def main(config):
             )
             save2json(batch_img_id, pred_boxes, json_result, class_map)
 
+    end_time = time.time()
+    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -195,6 +198,7 @@ def main(config):
         map, map50 = eval.stats[:2]
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov5s/ixrt/inference.py b/models/cv/detection/yolov5s/ixrt/inference.py
index addf5278..1ee67a01 100644
--- a/models/cv/detection/yolov5s/ixrt/inference.py
+++ b/models/cv/detection/yolov5s/ixrt/inference.py
@@ -101,6 +101,7 @@ def main(config):
     output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
     print(f"output shape : {output.shape} output type : {output.dtype}")
 
+    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -154,6 +155,8 @@ def main(config):
             )
             save2json(batch_img_id, pred_boxes, json_result, class_map)
 
+    end_time = time.time()
+    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -195,6 +198,7 @@ def main(config):
         map, map50 = eval.stats[:2]
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov6/ixrt/inference.py b/models/cv/detection/yolov6/ixrt/inference.py
index 836f13b2..1a4f151f 100644
--- a/models/cv/detection/yolov6/ixrt/inference.py
+++ b/models/cv/detection/yolov6/ixrt/inference.py
@@ -237,11 +237,15 @@ def main():
         print("FPS : ", fps)
         print(f"Performance Check : Test {fps} >= target {args.fps_target}")
     else:
+        start_time = time.time()
         dataloader, pred_results = evaluator.eval_ixrt(args)
         eval_result = evaluator.eval_ixrt_map(pred_results, dataloader, task)
+        end_time = time.time()
+        e2e_time = end_time - start_time
         map, map50 = eval_result[:2]
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {args.acc_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= args.acc_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov7/ixrt/inference.py b/models/cv/detection/yolov7/ixrt/inference.py
index a7a60c87..554665fe 100644
--- a/models/cv/detection/yolov7/ixrt/inference.py
+++ b/models/cv/detection/yolov7/ixrt/inference.py
@@ -101,6 +101,7 @@ def main(config):
     output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
     print(f"output shape : {output.shape} output type : {output.dtype}")
 
+    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -154,6 +155,8 @@ def main(config):
             )
             save2json(batch_img_id, pred_boxes, json_result, class_map)
 
+    end_time = time.time()
+    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -195,6 +198,7 @@ def main(config):
         map, map50 = eval.stats[:2]
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
-- 
Gitee


From fe1d0f7232b98aabc802238ac0fcbd63d0c6d21c Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 8 Jan 2025 13:49:12 +0800
Subject: [PATCH 10/35] update dataset path

---
 tests/run_ixrt.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index 4215221e..c57cfd48 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -206,13 +206,13 @@ def run_detec_testcase(model):
     dataset_n = model["datasets"].split("/")[-1]
     prepare_script = f"""
     cd ../{model['relative_path']}
-    ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
+    ln -s /root/data/datasets/{dataset_n} ./
     bash ci/prepare.sh
     """
 
     # if model["need_third_part"] and model["3rd_party_repo"]:
     #     third_party_repo = model["3rd_party_repo"]
-    #     prepare_script += f"unzip /mnt/deepspark/data/3rd_party/{third_party_repo}.zip -d ./\n"
+    #     prepare_script += f"unzip /root/data/3rd_party/{third_party_repo}.zip -d ./\n"
     # prepare_script += "bash ci/prepare.sh\n"
 
     # add pip list info when in debug mode
@@ -293,9 +293,9 @@ def run_ocr_testcase(model):
     dataset_n = model["datasets"].split("/")[-1]
     prepare_script = f"""
     cd ../{model['relative_path']}
-    ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./
-    ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
-    unzip /mnt/deepspark/data/3rd_party/PaddleOCR-release-2.6.zip -d ./PaddleOCR
+    ln -s /root/data/checkpoints/{checkpoint_n} ./
+    ln -s /root/data/datasets/{dataset_n} ./
+    unzip /root/data/3rd_party/PaddleOCR-release-2.6.zip -d ./PaddleOCR
     bash ci/prepare.sh
     """
 
@@ -349,12 +349,12 @@ def run_trace_testcase(model):
     dataset_n = model["datasets"].split("/")[-1]
     prepare_script = f"""
     cd ../{model['relative_path']}
-    ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./
-    ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
+    ln -s /root/data/checkpoints/{checkpoint_n} ./
+    ln -s /root/data/datasets/{dataset_n} ./
     """
 
     if model["need_third_part"]:
-        prepare_script += "unzip /mnt/deepspark/data/3rd_party/fast-reid.zip -d ./fast-reid\n"
+        prepare_script += "unzip /root/data/3rd_party/fast-reid.zip -d ./fast-reid\n"
 
     prepare_script += """
     bash ci/prepare.sh
@@ -417,14 +417,14 @@ def run_nlp_testcase(model):
     set -x
     cd ../{model['relative_path']}
     {mkdir_script}
-    ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./{target_dir}
-    export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n}
+    ln -s /root/data/checkpoints/{checkpoint_n} ./{target_dir}
+    export DATASETS_DIR=/root/data/datasets/{dataset_n}
     bash ci/prepare.sh
     """
 
     # prepare int8 model for bert_large_squad
     if model_name == "bert_large_squad":
-        prepare_script += "ln -s /mnt/deepspark/data/checkpoints/bert_large_int8.hdf5 ./\n"
+        prepare_script += "ln -s /root/data/checkpoints/bert_large_int8.hdf5 ./\n"
 
     # add pip list info when in debug mode
     if utils.is_debug():
@@ -437,7 +437,7 @@ def run_nlp_testcase(model):
         logging.info(f"Start running {model_name} {prec} test case")
         script = f"""
         set -x
-        export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n}
+        export DATASETS_DIR=/root/data/datasets/{dataset_n}
         cd ../{model['relative_path']}
         bash scripts/infer_{model_name}_{prec}_accuracy.sh
         bash scripts/infer_{model_name}_{prec}_performance.sh
@@ -469,14 +469,14 @@ def run_speech_testcase(model):
     dataset_n = model["datasets"].split("/")[-1]
     prepare_script = f"""
     cd ../{model['relative_path']}
-    ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./
-    ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
+    ln -s /root/data/checkpoints/{checkpoint_n} ./
+    ln -s /root/data/datasets/{dataset_n} ./
     """
 
     if model["need_third_part"] and model_name == "conformer":
-        prepare_script += "unzip /mnt/deepspark/data/3rd_party/kenlm.zip -d ./ctc_decoder/swig/kenlm\n"
-        prepare_script += "unzip /mnt/deepspark/data/3rd_party/ThreadPool.zip -d ./ctc_decoder/swig/ThreadPool\n"
-        prepare_script += "tar -xzvf /mnt/deepspark/data/3rd_party/openfst-1.6.3.tar.gz -C ./ctc_decoder/swig/\n"
+        prepare_script += "unzip /root/data/3rd_party/kenlm.zip -d ./ctc_decoder/swig/kenlm\n"
+        prepare_script += "unzip /root/data/3rd_party/ThreadPool.zip -d ./ctc_decoder/swig/ThreadPool\n"
+        prepare_script += "tar -xzvf /root/data/3rd_party/openfst-1.6.3.tar.gz -C ./ctc_decoder/swig/\n"
 
     prepare_script += """
     export PYTHONPATH=`pwd`/wenet:$PYTHONPATH
-- 
Gitee


From 0d49eb4715dbba2574cb8ffc6fce85b350ab0db4 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 8 Jan 2025 14:35:24 +0800
Subject: [PATCH 11/35] update e2e time print

---
 models/cv/detection/detr/ixrt/inference.py    | 8 +++-----
 models/cv/detection/yolov3/ixrt/inference.py  | 6 ++----
 models/cv/detection/yolov4/ixrt/inference.py  | 8 ++++----
 models/cv/detection/yolov5/ixrt/inference.py  | 7 +++----
 models/cv/detection/yolov5s/ixrt/inference.py | 7 +++----
 models/cv/detection/yolov7/ixrt/inference.py  | 7 +++----
 6 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/models/cv/detection/detr/ixrt/inference.py b/models/cv/detection/detr/ixrt/inference.py
index 7da3b286..e7c112c8 100755
--- a/models/cv/detection/detr/ixrt/inference.py
+++ b/models/cv/detection/detr/ixrt/inference.py
@@ -94,7 +94,6 @@ def main(config):
     print(f"pred_logits shape : {batch_pred_logits.shape} pred_logits type : {batch_pred_logits.dtype}")
     print(f"pred_boxes shape : {batch_pred_boxes.shape} pred_boxes type : {batch_pred_boxes.dtype}")
 
-    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -129,9 +128,6 @@ def main(config):
                 # ipdb.set_trace()
                       
                 save2json(img_id, pred_boxes, json_result)
-    end_time = time.time()
-    e2e_time = end_time - start_time
-    print(F"E2E time : {e2e_time:.3f} seconds")
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -158,6 +154,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -169,8 +166,9 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
         if map50 >= config.map_target:
diff --git a/models/cv/detection/yolov3/ixrt/inference.py b/models/cv/detection/yolov3/ixrt/inference.py
index dab869b7..42413282 100644
--- a/models/cv/detection/yolov3/ixrt/inference.py
+++ b/models/cv/detection/yolov3/ixrt/inference.py
@@ -101,7 +101,6 @@ def main(config):
     output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
     print(f"output shape : {output.shape} output type : {output.dtype}")
 
-    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -154,8 +153,6 @@ def main(config):
                 max_det=config.max_det
             )
             save2json(batch_img_id, pred_boxes, json_result, class_map)
-    end_time = time.time()
-    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -182,6 +179,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -193,7 +191,7 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
diff --git a/models/cv/detection/yolov4/ixrt/inference.py b/models/cv/detection/yolov4/ixrt/inference.py
index cbc2debb..4cb31b1b 100644
--- a/models/cv/detection/yolov4/ixrt/inference.py
+++ b/models/cv/detection/yolov4/ixrt/inference.py
@@ -68,7 +68,7 @@ def main(config):
     forward_time = 0.0
     class_map = coco80_to_coco91_class()
     num_samples = 0
-    start_time = time.time()
+
     # Step3: Run on coco dataset
     for batch_names, batch_images, batch_shapes in tqdm(zip(*dataloader)):
         batch_data = np.ascontiguousarray(batch_images)
@@ -111,8 +111,6 @@ def main(config):
                 pred_results.append(pred_box.tolist())
 
             save2json(batch_img_id, pred_results, json_result, class_map)
-    end_time = time.time()
-    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -139,6 +137,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -150,8 +149,9 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
         print(F"E2E time : {e2e_time:.3f} seconds")
diff --git a/models/cv/detection/yolov5/ixrt/inference.py b/models/cv/detection/yolov5/ixrt/inference.py
index 554665fe..c0476b89 100644
--- a/models/cv/detection/yolov5/ixrt/inference.py
+++ b/models/cv/detection/yolov5/ixrt/inference.py
@@ -101,7 +101,6 @@ def main(config):
     output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
     print(f"output shape : {output.shape} output type : {output.dtype}")
 
-    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -155,8 +154,6 @@ def main(config):
             )
             save2json(batch_img_id, pred_boxes, json_result, class_map)
 
-    end_time = time.time()
-    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -183,6 +180,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -194,8 +192,9 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
         print(F"E2E time : {e2e_time:.3f} seconds")
diff --git a/models/cv/detection/yolov5s/ixrt/inference.py b/models/cv/detection/yolov5s/ixrt/inference.py
index 1ee67a01..ad87fe1e 100644
--- a/models/cv/detection/yolov5s/ixrt/inference.py
+++ b/models/cv/detection/yolov5s/ixrt/inference.py
@@ -101,7 +101,6 @@ def main(config):
     output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
     print(f"output shape : {output.shape} output type : {output.dtype}")
 
-    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -155,8 +154,6 @@ def main(config):
             )
             save2json(batch_img_id, pred_boxes, json_result, class_map)
 
-    end_time = time.time()
-    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -183,6 +180,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -194,8 +192,9 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
         print(F"E2E time : {e2e_time:.3f} seconds")
diff --git a/models/cv/detection/yolov7/ixrt/inference.py b/models/cv/detection/yolov7/ixrt/inference.py
index 554665fe..c0476b89 100644
--- a/models/cv/detection/yolov7/ixrt/inference.py
+++ b/models/cv/detection/yolov7/ixrt/inference.py
@@ -101,7 +101,6 @@ def main(config):
     output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
     print(f"output shape : {output.shape} output type : {output.dtype}")
 
-    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()]
@@ -155,8 +154,6 @@ def main(config):
             )
             save2json(batch_img_id, pred_boxes, json_result, class_map)
 
-    end_time = time.time()
-    e2e_time = end_time - start_time
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -183,6 +180,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -194,8 +192,9 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
         print(F"E2E time : {e2e_time:.3f} seconds")
-- 
Gitee


From 94a60eeaf791643c66b2ad663b1f8ace88ebf46a Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Thu, 9 Jan 2025 10:34:21 +0800
Subject: [PATCH 12/35] fix classification and detection model error

---
 .../cspdarknet53/ixrt/ci/prepare.sh           |  2 +-
 .../cspresnet50/ixrt/requirements.txt         |  3 ++-
 .../classification/densenet161/ixrt/README.md |  4 ++--
 ....sh => infer_densenet161_fp16_accuracy.sh} |  0
 ... => infer_densenet161_fp16_performance.sh} |  0
 .../classification/densenet169/ixrt/README.md |  6 +++---
 ....sh => infer_densenet169_fp16_accuracy.sh} |  0
 ... => infer_densenet169_fp16_performance.sh} |  0
 .../efficientnet_b1/ixrt/ci/prepare.sh        |  2 +-
 .../efficientnet_b1/ixrt/requirements.txt     |  3 ++-
 .../hrnet_w18/ixrt/requirements.txt           |  3 ++-
 .../inceptionresnetv2/ixrt/inference.py       |  5 +++--
 .../classification/repvgg/ixrt/ci/prepare.sh  |  2 +-
 .../repvgg/ixrt/requirements.txt              |  3 ++-
 .../resnet101/ixrt/requirements.txt           |  3 ++-
 .../resnet34/ixrt/requirements.txt            |  3 ++-
 .../resnetv1d50/ixrt/requirements.txt         |  3 ++-
 ....sh => infer_resnetv1d50_fp16_accuracy.sh} |  0
 ... => infer_resnetv1d50_fp16_performance.sh} |  0
 ....sh => infer_resnetv1d50_int8_accuracy.sh} |  0
 ... => infer_resnetv1d50_int8_performance.sh} |  0
 .../squeezenet_v1_0/ixrt/requirements.txt     |  3 ++-
 .../ixrt/requirements.txt                     |  1 -
 .../wide_resnet50/ixrt/requirements.txt       |  3 ++-
 models/cv/detection/fcos/ixrt/README.md       |  2 +-
 models/cv/detection/fcos/ixrt/ci/prepare.sh   | 19 +++++++++++++++++--
 models/cv/detection/yolov5/ixrt/ci/prepare.sh | 10 +++++-----
 .../cv/detection/yolov5s/ixrt/ci/prepare.sh   |  9 ++++-----
 models/cv/detection/yolov6/ixrt/ci/prepare.sh |  2 +-
 models/cv/detection/yolov7/ixrt/ci/prepare.sh |  2 +-
 30 files changed, 58 insertions(+), 35 deletions(-)
 rename models/cv/classification/densenet161/ixrt/scripts/{infer_densenet_fp16_accuracy.sh => infer_densenet161_fp16_accuracy.sh} (100%)
 rename models/cv/classification/densenet161/ixrt/scripts/{infer_densenet_fp16_performance.sh => infer_densenet161_fp16_performance.sh} (100%)
 rename models/cv/classification/densenet169/ixrt/scripts/{infer_densenet_fp16_accuracy.sh => infer_densenet169_fp16_accuracy.sh} (100%)
 rename models/cv/classification/densenet169/ixrt/scripts/{infer_densenet_fp16_performance.sh => infer_densenet169_fp16_performance.sh} (100%)
 rename models/cv/classification/resnetv1d50/ixrt/scripts/{infer_resnet_v1_d50_fp16_accuracy.sh => infer_resnetv1d50_fp16_accuracy.sh} (100%)
 rename models/cv/classification/resnetv1d50/ixrt/scripts/{infer_resnet_v1_d50_fp16_performance.sh => infer_resnetv1d50_fp16_performance.sh} (100%)
 rename models/cv/classification/resnetv1d50/ixrt/scripts/{infer_resnet_v1_d50_int8_accuracy.sh => infer_resnetv1d50_int8_accuracy.sh} (100%)
 rename models/cv/classification/resnetv1d50/ixrt/scripts/{infer_resnet_v1_d50_int8_performance.sh => infer_resnetv1d50_int8_performance.sh} (100%)

diff --git a/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh
index 891f9f55..29a65f01 100644
--- a/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh
+++ b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh
@@ -31,4 +31,4 @@ unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
 python3 export.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight /root/data/checkpoints/cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx
 
 mkdir -p checkpoints
-onnxsim cspdarknet5.onnx checkpoints/cspdarknet53_sim.onnx
\ No newline at end of file
+onnxsim cspdarknet53.onnx checkpoints/cspdarknet53_sim.onnx
\ No newline at end of file
diff --git a/models/cv/classification/cspresnet50/ixrt/requirements.txt b/models/cv/classification/cspresnet50/ixrt/requirements.txt
index fdd84a5d..566974bb 100644
--- a/models/cv/classification/cspresnet50/ixrt/requirements.txt
+++ b/models/cv/classification/cspresnet50/ixrt/requirements.txt
@@ -4,4 +4,5 @@ onnx
 onnxsim
 opencv-python==4.6.0.66
 mmcls==0.24.0
-mmcv==1.5.3
\ No newline at end of file
+mmcv==1.5.3
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/densenet161/ixrt/README.md b/models/cv/classification/densenet161/ixrt/README.md
index c1d5a157..5e225c5e 100644
--- a/models/cv/classification/densenet161/ixrt/README.md
+++ b/models/cv/classification/densenet161/ixrt/README.md
@@ -39,9 +39,9 @@ export DATASETS_DIR=/Path/to/imagenet_val/
 
 ```bash
 # Accuracy
-bash scripts/infer_densenet_fp16_accuracy.sh
+bash scripts/infer_densenet161_fp16_accuracy.sh
 # Performance
-bash scripts/infer_densenet_fp16_performance.sh
+bash scripts/infer_densenet161_fp16_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
rename to models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_accuracy.sh
diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
rename to models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_performance.sh
diff --git a/models/cv/classification/densenet169/ixrt/README.md b/models/cv/classification/densenet169/ixrt/README.md
index 79c0b0a4..480e9df6 100644
--- a/models/cv/classification/densenet169/ixrt/README.md
+++ b/models/cv/classification/densenet169/ixrt/README.md
@@ -40,13 +40,13 @@ export DATASETS_DIR=/Path/to/imagenet_val/
 
 ```bash
 # Accuracy
-bash scripts/infer_densenet_fp16_accuracy.sh
+bash scripts/infer_densenet169_fp16_accuracy.sh
 # Performance
-bash scripts/infer_densenet_fp16_performance.sh
+bash scripts/infer_densenet169_fp16_performance.sh
 ```
 
 ## Results
 
 | Model    | BatchSize | Precision | FPS     | Top-1(%) | Top-5(%) |
 | -------- | --------- | --------- | ------- | -------- | -------- |
-| DenseNet | 32        | FP16      | 1119.69 | 0.7558   | 0.9284   |
+| DenseNet169 | 32        | FP16      | 1119.69 | 0.7558   | 0.9284   |
diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
rename to models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_accuracy.sh
diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
rename to models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_performance.sh
diff --git a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
index 7131b5f0..d9e780f2 100644
--- a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
+++ b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
@@ -29,5 +29,5 @@ pip install -r requirements.txt
 
 mkdir checkpoints
 mkdir -p /root/.cache/torch/hub/checkpoints/
-ln -s /root/data/checkpoints/efficientnet_b1-c27df63c.pth /root/.cache/torch/hub/checkpoints/efficientnet_b1-c27df63c.pth
+ln -s /root/data/checkpoints/efficientnet_b1_rwightman-bac287d4.pth /root/.cache/torch/hub/checkpoints/efficientnet_b1_rwightman-bac287d4.pth
 python3 export_onnx.py --output_model checkpoints/efficientnet-b1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b1/ixrt/requirements.txt b/models/cv/classification/efficientnet_b1/ixrt/requirements.txt
index e1eda59c..bc645b22 100644
--- a/models/cv/classification/efficientnet_b1/ixrt/requirements.txt
+++ b/models/cv/classification/efficientnet_b1/ixrt/requirements.txt
@@ -1,4 +1,5 @@
 tqdm
 onnx
 onnxsim
-tabulate
\ No newline at end of file
+tabulate
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/hrnet_w18/ixrt/requirements.txt b/models/cv/classification/hrnet_w18/ixrt/requirements.txt
index 84b43c5d..96208002 100644
--- a/models/cv/classification/hrnet_w18/ixrt/requirements.txt
+++ b/models/cv/classification/hrnet_w18/ixrt/requirements.txt
@@ -4,4 +4,5 @@ onnxsim
 tabulate
 ppq
 mmpretrain
-mmcv-lite
\ No newline at end of file
+mmcv-lite
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/inceptionresnetv2/ixrt/inference.py b/models/cv/classification/inceptionresnetv2/ixrt/inference.py
index 569cad96..17f473bf 100644
--- a/models/cv/classification/inceptionresnetv2/ixrt/inference.py
+++ b/models/cv/classification/inceptionresnetv2/ixrt/inference.py
@@ -86,7 +86,7 @@ def main(config):
 
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
-        
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
@@ -110,7 +110,8 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        e2e_time = time.time() - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/repvgg/ixrt/ci/prepare.sh b/models/cv/classification/repvgg/ixrt/ci/prepare.sh
index 6c43cd6e..221eb5dd 100644
--- a/models/cv/classification/repvgg/ixrt/ci/prepare.sh
+++ b/models/cv/classification/repvgg/ixrt/ci/prepare.sh
@@ -26,7 +26,7 @@ else
 fi
 
 pip install -r requirements.txt
-unzip -q /mnt/deepspark/data/repos/mmpretrain-0.24.0.zip -d ./
+unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
 mkdir -p checkpoints 
 python3 export_onnx.py   \
     --config_file ./mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \
diff --git a/models/cv/classification/repvgg/ixrt/requirements.txt b/models/cv/classification/repvgg/ixrt/requirements.txt
index fdd84a5d..566974bb 100644
--- a/models/cv/classification/repvgg/ixrt/requirements.txt
+++ b/models/cv/classification/repvgg/ixrt/requirements.txt
@@ -4,4 +4,5 @@ onnx
 onnxsim
 opencv-python==4.6.0.66
 mmcls==0.24.0
-mmcv==1.5.3
\ No newline at end of file
+mmcv==1.5.3
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/resnet101/ixrt/requirements.txt b/models/cv/classification/resnet101/ixrt/requirements.txt
index e1eda59c..bc645b22 100644
--- a/models/cv/classification/resnet101/ixrt/requirements.txt
+++ b/models/cv/classification/resnet101/ixrt/requirements.txt
@@ -1,4 +1,5 @@
 tqdm
 onnx
 onnxsim
-tabulate
\ No newline at end of file
+tabulate
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/resnet34/ixrt/requirements.txt b/models/cv/classification/resnet34/ixrt/requirements.txt
index e1eda59c..bc645b22 100644
--- a/models/cv/classification/resnet34/ixrt/requirements.txt
+++ b/models/cv/classification/resnet34/ixrt/requirements.txt
@@ -1,4 +1,5 @@
 tqdm
 onnx
 onnxsim
-tabulate
\ No newline at end of file
+tabulate
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/requirements.txt b/models/cv/classification/resnetv1d50/ixrt/requirements.txt
index 84b43c5d..96208002 100644
--- a/models/cv/classification/resnetv1d50/ixrt/requirements.txt
+++ b/models/cv/classification/resnetv1d50/ixrt/requirements.txt
@@ -4,4 +4,5 @@ onnxsim
 tabulate
 ppq
 mmpretrain
-mmcv-lite
\ No newline at end of file
+mmcv-lite
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh
rename to models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh
rename to models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh
rename to models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh
rename to models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt b/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt
index 24dc14b7..8ea6ea90 100644
--- a/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt
+++ b/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt
@@ -1,3 +1,4 @@
 tqdm
 onnxsim
-opencv-python==4.6.0.66
\ No newline at end of file
+opencv-python==4.6.0.66
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/swin_transformer_large/ixrt/requirements.txt b/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
index 512930b8..6e905779 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
+++ b/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
@@ -5,6 +5,5 @@ tqdm
 pycuda
 onnx
 tabulate
-cv2
 pycocotools
 opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/requirements.txt b/models/cv/classification/wide_resnet50/ixrt/requirements.txt
index ecd31631..4b82bed8 100644
--- a/models/cv/classification/wide_resnet50/ixrt/requirements.txt
+++ b/models/cv/classification/wide_resnet50/ixrt/requirements.txt
@@ -1,2 +1,3 @@
 onnx
-tqdm
\ No newline at end of file
+tqdm
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/fcos/ixrt/README.md b/models/cv/detection/fcos/ixrt/README.md
index 51e30390..edfdeefe 100755
--- a/models/cv/detection/fcos/ixrt/README.md
+++ b/models/cv/detection/fcos/ixrt/README.md
@@ -43,7 +43,7 @@ Pretrained model: <https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_cen
 MMDetection is an open source object detection toolbox based on PyTorch. It is a part of the OpenMMLab project.It is utilized for model conversion. In MMDetection, Execute model conversion command, and the checkpoints folder needs to be created, (mkdir checkpoints) in project
 
 ```bash
-
+mkdir -p checkpoints
 git clone -b v2.25.0 https://github.com/open-mmlab/mmdetection.git
 cd mmdetection
 python3 tools/deployment/pytorch2onnx.py \
diff --git a/models/cv/detection/fcos/ixrt/ci/prepare.sh b/models/cv/detection/fcos/ixrt/ci/prepare.sh
index 1f0ab34a..d1f5ae86 100644
--- a/models/cv/detection/fcos/ixrt/ci/prepare.sh
+++ b/models/cv/detection/fcos/ixrt/ci/prepare.sh
@@ -25,5 +25,20 @@ else
     echo "Not Support Os"
 fi
 pip3 install -r requirements.txt
-python3 export.py --weight fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth --cfg fcos_r50_caffe_fpn_gn-head_1x_coco.py --output fcos.onnx
-onnxsim fcos.onnx fcos_opt.onnx
\ No newline at end of file
+
+mkdir -p checkpoints
+unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
+cd mmdetection
+python3 tools/deployment/pytorch2onnx.py \
+    ../fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py \
+    /root/data/checkpoints/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth \
+    --output-file ../checkpoints/r50_fcos.onnx \
+    --input-img demo/demo.jpg \
+    --test-img tests/data/color.jpg \
+    --shape 800 800 \
+    --show \
+    --verify \
+    --skip-postprocess \
+    --dynamic-export \
+    --cfg-options \
+      model.test_cfg.deploy_nms_pre=-1
\ No newline at end of file
diff --git a/models/cv/detection/yolov5/ixrt/ci/prepare.sh b/models/cv/detection/yolov5/ixrt/ci/prepare.sh
index c1729249..4f9fdd26 100644
--- a/models/cv/detection/yolov5/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov5/ixrt/ci/prepare.sh
@@ -28,16 +28,16 @@ fi
 pip3 install -r requirements.txt
 
 mkdir checkpoints
-git clone https://github.com/ultralytics/yolov5
+cp -r /root/data/3rd_party/yolov5 ./
 # 切换到需要的版本分支
 cd yolov5
-git checkout v6.1
 
 # 有一些环境需要安装
-wget https://ultralytics.com/assets/Arial.ttf
-cp Arial.ttf  /root/.config/Ultralytics/Arial.ttf
+# wget https://ultralytics.com/assets/Arial.ttf
+mkdir -p /root/.config/Ultralytics
+cp /root/data/3rd_party/Arial.ttf /root/.config/Ultralytics/Arial.ttf
 
 # 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数)
 python3 export.py --weights /root/data/checkpoints/yolov5m.pt --include onnx --opset 11 --batch-size 32
-mv yolov5m.onnx ./checkpoints
+mv yolov5m.onnx ../checkpoints
 cd ..
diff --git a/models/cv/detection/yolov5s/ixrt/ci/prepare.sh b/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
index 1621bed5..4395538f 100644
--- a/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
@@ -28,15 +28,14 @@ fi
 pip3 install -r requirements.txt
 
 mkdir -p checkpoints
-git clone https://github.com/ultralytics/yolov5
-# 切换到需要的版本分支
+cp -r /root/data/3rd_party/yolov5 ./
+
 cd yolov5/
-git checkout v6.1
 
 # 有一些环境需要安装
-wget https://ultralytics.com/assets/Arial.ttf
+# wget https://ultralytics.com/assets/Arial.ttf
 mkdir -p /root/.config/Ultralytics
-cp Arial.ttf  /root/.config/Ultralytics/Arial.ttf
+cp /root/data/3rd_party/Arial.ttf /root/.config/Ultralytics/Arial.ttf
 
 # 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数)
 python3 export.py --weights /root/data/checkpoints/yolov5s.pt --include onnx --opset 11 --batch-size 32
diff --git a/models/cv/detection/yolov6/ixrt/ci/prepare.sh b/models/cv/detection/yolov6/ixrt/ci/prepare.sh
index 061db478..a9054425 100644
--- a/models/cv/detection/yolov6/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov6/ixrt/ci/prepare.sh
@@ -27,7 +27,7 @@ fi
 
 pip3 install -r requirements.txt
 mkdir -p data/
-git clone https://github.com/meituan/YOLOv6.git
+cp -r /root/data/3rd_party/YOLOv6 ./
 
 cd YOLOv6
 pip3 install -r requirements.txt
diff --git a/models/cv/detection/yolov7/ixrt/ci/prepare.sh b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
index 8b6d9fbf..f74b2212 100644
--- a/models/cv/detection/yolov7/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
@@ -27,7 +27,7 @@ fi
 
 pip3 install -r requirements.txt
 mkdir -p checkpoints
-git clone https://github.com/WongKinYiu/yolov7.git
+cp -r /root/data/3rd_party/yolov7 ./
 cd yolov7
 python3 export.py --weights /root/data/checkpoints/yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 32
 mv yolov7.onnx ../checkpoints/yolov7m.onnx
-- 
Gitee


From 3f6d083f02d7acb447e633dfa7ba88949f5d7900 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Thu, 9 Jan 2025 15:11:41 +0800
Subject: [PATCH 13/35] fix models

---
 ....sh => infer_cspdarknet53_fp16_accuracy.sh} |  0
 ... => infer_cspdarknet53_fp16_performance.sh} |  0
 ....sh => infer_cspdarknet53_int8_accuracy.sh} |  0
 ... => infer_cspdarknet53_int8_performance.sh} |  0
 .../densenet169/ixrt/ci/prepare.sh             |  2 +-
 .../efficientnet_b0/ixrt/ci/prepare.sh         |  2 +-
 models/cv/detection/yolov4/ixrt/ci/prepare.sh  |  2 +-
 .../ixrt/scripts/infer_yolov4_fp16_accuracy.sh |  2 +-
 .../scripts/infer_yolov4_fp16_performance.sh   |  2 +-
 .../ixrt/scripts/infer_yolov4_int8_accuracy.sh |  2 +-
 .../scripts/infer_yolov4_int8_performance.sh   |  2 +-
 models/cv/detection/yolov5/ixrt/ci/prepare.sh  |  3 ++-
 models/cv/detection/yolov5s/ixrt/ci/prepare.sh |  3 ++-
 models/cv/detection/yolov6/ixrt/ci/prepare.sh  |  5 +++--
 models/cv/detection/yolov7/ixrt/ci/prepare.sh  |  3 ++-
 tests/models_ixrt.yaml                         | 18 +++++++-----------
 16 files changed, 23 insertions(+), 23 deletions(-)
 rename models/cv/classification/cspdarknet53/ixrt/scripts/{infer_cspdarknet50_fp16_accuracy.sh => infer_cspdarknet53_fp16_accuracy.sh} (100%)
 rename models/cv/classification/cspdarknet53/ixrt/scripts/{infer_cspdarknet50_fp16_performance.sh => infer_cspdarknet53_fp16_performance.sh} (100%)
 rename models/cv/classification/cspdarknet53/ixrt/scripts/{infer_cspdarknet50_int8_accuracy.sh => infer_cspdarknet53_int8_accuracy.sh} (100%)
 rename models/cv/classification/cspdarknet53/ixrt/scripts/{infer_cspdarknet50_int8_performance.sh => infer_cspdarknet53_int8_performance.sh} (100%)

diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_fp16_accuracy.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_fp16_accuracy.sh
rename to models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_accuracy.sh
diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_fp16_performance.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_fp16_performance.sh
rename to models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_performance.sh
diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_int8_accuracy.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_accuracy.sh
similarity index 100%
rename from models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_int8_accuracy.sh
rename to models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_accuracy.sh
diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_int8_performance.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_performance.sh
similarity index 100%
rename from models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_int8_performance.sh
rename to models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_performance.sh
diff --git a/models/cv/classification/densenet169/ixrt/ci/prepare.sh b/models/cv/classification/densenet169/ixrt/ci/prepare.sh
index 21091147..da20933f 100644
--- a/models/cv/classification/densenet169/ixrt/ci/prepare.sh
+++ b/models/cv/classification/densenet169/ixrt/ci/prepare.sh
@@ -27,4 +27,4 @@ fi
 
 pip install -r requirements.txt
 
-python3 export.py --weight /root/data/checkpoints/densenet169-b2777c0a.pth --output densenet161.onnx
\ No newline at end of file
+python3 export.py --weight /root/data/checkpoints/densenet169-b2777c0a.pth --output densenet169.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
index ca66169b..75a8391b 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
@@ -27,4 +27,4 @@ fi
 
 pip install -r requirements.txt
 mkdir checkpoints
-python3 export_onnx.py --origin_model /root/data/checpoints/efficientnet_b0_rwightman-3dd342df.pth --output_model checkpoints/efficientnet_b0.onnx
\ No newline at end of file
+python3 export_onnx.py --origin_model /root/data/checkpoints/efficientnet_b0_rwightman-3dd342df.pth --output_model checkpoints/efficientnet_b0.onnx
\ No newline at end of file
diff --git a/models/cv/detection/yolov4/ixrt/ci/prepare.sh b/models/cv/detection/yolov4/ixrt/ci/prepare.sh
index e801c280..f5381ef3 100644
--- a/models/cv/detection/yolov4/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov4/ixrt/ci/prepare.sh
@@ -28,7 +28,7 @@ fi
 pip3 install -r requirements.txt
 
 # clone yolov4
-git clone --depth 1 https://github.com/Tianxiaomo/pytorch-YOLOv4.git yolov4
+cp -r /root/data/3rd_party/yolov4 ./
 
 mkdir data
 # export onnx model
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_accuracy.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_accuracy.sh
index b732d4eb..c33dc591 100644
--- a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_accuracy.sh
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_accuracy.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR:-"${PROJ_DIR}/data/coco"}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_performance.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_performance.sh
index 796dad72..a4a83ce7 100644
--- a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_performance.sh
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_performance.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR:-"${PROJ_DIR}/data/coco"}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_accuracy.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_accuracy.sh
index c62d174c..20e59378 100644
--- a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_accuracy.sh
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_accuracy.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR:-"${PROJ_DIR}/data/coco"}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_performance.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_performance.sh
index 2e335fa1..7f110386 100644
--- a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_performance.sh
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_performance.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR:-"${PROJ_DIR}/data/coco"}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov5/ixrt/ci/prepare.sh b/models/cv/detection/yolov5/ixrt/ci/prepare.sh
index 4f9fdd26..b66c06b5 100644
--- a/models/cv/detection/yolov5/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov5/ixrt/ci/prepare.sh
@@ -37,7 +37,8 @@ cd yolov5
 mkdir -p /root/.config/Ultralytics
 cp /root/data/3rd_party/Arial.ttf /root/.config/Ultralytics/Arial.ttf
 
+ln -s /root/data/checkpoints/yolov5m.pt ./
 # 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数)
-python3 export.py --weights /root/data/checkpoints/yolov5m.pt --include onnx --opset 11 --batch-size 32
+python3 export.py --weights yolov5m.pt --include onnx --opset 11 --batch-size 32
 mv yolov5m.onnx ../checkpoints
 cd ..
diff --git a/models/cv/detection/yolov5s/ixrt/ci/prepare.sh b/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
index 4395538f..b9f3a57f 100644
--- a/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
@@ -37,7 +37,8 @@ cd yolov5/
 mkdir -p /root/.config/Ultralytics
 cp /root/data/3rd_party/Arial.ttf /root/.config/Ultralytics/Arial.ttf
 
+ln -s /root/data/checkpoints/yolov5s.pt ./
 # 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数)
-python3 export.py --weights /root/data/checkpoints/yolov5s.pt --include onnx --opset 11 --batch-size 32
+python3 export.py --weights yolov5s.pt --include onnx --opset 11 --batch-size 32
 mv yolov5s.onnx ../checkpoints
 cd ..
diff --git a/models/cv/detection/yolov6/ixrt/ci/prepare.sh b/models/cv/detection/yolov6/ixrt/ci/prepare.sh
index a9054425..3aa607e3 100644
--- a/models/cv/detection/yolov6/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov6/ixrt/ci/prepare.sh
@@ -32,7 +32,8 @@ cp -r /root/data/3rd_party/YOLOv6 ./
 cd YOLOv6
 pip3 install -r requirements.txt
 
+ln -s /root/data/checkpoints/yolov6s.pt ./
 # export onnx model
-python3 deploy/ONNX/export_onnx.py --weights /root/data/checkpoints/yolov6s.pt --img 640 --batch-size 32 --simplify
-mv ../yolov6s.onnx ../data/
+python3 deploy/ONNX/export_onnx.py --weights yolov6s.pt --img 640 --batch-size 32 --simplify
+mv yolov6s.onnx ../data/
 cd ..
diff --git a/models/cv/detection/yolov7/ixrt/ci/prepare.sh b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
index f74b2212..fca64779 100644
--- a/models/cv/detection/yolov7/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
@@ -29,6 +29,7 @@ pip3 install -r requirements.txt
 mkdir -p checkpoints
 cp -r /root/data/3rd_party/yolov7 ./
 cd yolov7
-python3 export.py --weights /root/data/checkpoints/yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 32
+ln -s /root/data/checkpoints/yolov7.pt ./
+python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 32
 mv yolov7.onnx ../checkpoints/yolov7m.onnx
 cd ..
diff --git a/tests/models_ixrt.yaml b/tests/models_ixrt.yaml
index 20746ced..8282ad95 100644
--- a/tests/models_ixrt.yaml
+++ b/tests/models_ixrt.yaml
@@ -68,7 +68,7 @@
   relative_path: models/cv/classification/efficientnet_b0/ixrt
   task_type: cv/classification
 - datasets: https://www.image-net.org/download.php
-  download_url: https://local/efficientnet_b1.pth
+  download_url: https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth
   name: efficientnet_b1
   need_third_part: false
   precisions:
@@ -283,12 +283,11 @@
   need_third_part: false
   precisions:
     - fp16
-  relative_path: models/cv/detection/detr/i8xrt
+  relative_path: models/cv/detection/detr/ixrt
   task_type: cv/detection
 - datasets: local/coco
   download_url: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth
   name: fcos
-  need_comfirm: true
   need_third_part: true
   precisions:
     - fp16
@@ -380,7 +379,7 @@
 - datasets: local/coco
   download_url: https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth
   name: lightweight_openpose
-  need_third_part: false
+  need_third_part: true
   precisions:
     - fp16
   relative_path: models/cv/pose_estimation/lightweight_openpose/ixrt
@@ -404,7 +403,7 @@
 - datasets: local/coco
   download_url: https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth
   name: solov1
-  need_third_part: false
+  need_third_part: true
   precisions:
     - fp16
   relative_path: models/cv/segmentation/solov1/ixrt
@@ -421,9 +420,9 @@
 - datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar
   download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_albert.tar
   name: albert
-  need_third_part: false
+  need_third_part: true
   precisions:
-    - int8
+    - fp16
   relative_path: models/nlp/language_model/albert/ixrt
   task_type: nlp/language_model
 - datasets: local/SQuAD
@@ -432,6 +431,7 @@
   need_third_part: false
   precisions:
     - fp16
+    - int8
   relative_path: models/nlp/language_model/bert_base_squad/ixrt
   task_type: nlp/language_model
 - datasets: local/SQuAD
@@ -449,7 +449,6 @@
   need_third_part: false
   precisions:
     - fp16
-    - int8
   relative_path: models/nlp/language_model/deberta/ixrt
   task_type: nlp/language_model
 - datasets: local/SQuAD
@@ -458,7 +457,6 @@
   need_third_part: false
   precisions:
     - fp16
-    - int8
   relative_path: models/nlp/language_model/roberta/ixrt
   task_type: nlp/language_model
 - datasets: local/SQuAD
@@ -467,7 +465,6 @@
   need_third_part: false
   precisions:
     - fp16
-    - int8
   relative_path: models/nlp/language_model/roformer/ixrt
   task_type: nlp/language_model
 - datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/cifar-100-python.tar
@@ -476,7 +473,6 @@
   need_third_part: false
   precisions:
     - fp16
-    - int8
   relative_path: models/nlp/language_model/videobert/ixrt
   task_type: nlp/language_model
 - datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv
-- 
Gitee


From 612ff4b9633a60bb4457e338b92759e079d3b049 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Thu, 9 Jan 2025 15:12:45 +0800
Subject: [PATCH 14/35] add nlp models

---
 .../nlp/language_model/albert/ixrt/README.md  |  12 +-
 .../language_model/albert/ixrt/ci/prepare.sh  |  62 +++++
 .../albert/ixrt/requirements.txt              |  11 +
 .../bert_base_squad/ixrt/ci/prepare.sh        |  36 +++
 .../bert_base_squad/ixrt/python/inference.py  |   4 +
 .../bert_large_squad/ixrt/ci/prepare.sh       |  35 +++
 .../bert_large_squad/ixrt/python/inference.py |   4 +
 .../nlp/language_model/deberta/ixrt/README.md |  13 +-
 .../language_model/deberta/ixrt/ci/prepare.sh |  57 +++++
 .../deberta/ixrt/requirements.txt             |  12 +
 .../nlp/language_model/roberta/ixrt/README.md |   6 +-
 .../language_model/roberta/ixrt/ci/prepare.sh |  56 +++++
 .../roberta/ixrt/requirements.txt             |   5 +
 .../language_model/roformer/ixrt/README.md    |   5 +-
 .../roformer/ixrt/ci/prepare.sh               |  61 +++++
 .../roformer/ixrt/requirements.txt            |   4 +
 .../language_model/videobert/ixrt/README.md   |  12 +-
 .../videobert/ixrt/ci/prepare.sh              |  45 ++++
 .../videobert/ixrt/requirements.txt           |  11 +
 tests/run_ixrt.py                             | 211 ++++++++----------
 20 files changed, 500 insertions(+), 162 deletions(-)
 create mode 100644 models/nlp/language_model/albert/ixrt/ci/prepare.sh
 create mode 100644 models/nlp/language_model/albert/ixrt/requirements.txt
 create mode 100644 models/nlp/language_model/deberta/ixrt/ci/prepare.sh
 create mode 100644 models/nlp/language_model/deberta/ixrt/requirements.txt
 create mode 100644 models/nlp/language_model/roberta/ixrt/ci/prepare.sh
 create mode 100644 models/nlp/language_model/roberta/ixrt/requirements.txt
 create mode 100644 models/nlp/language_model/roformer/ixrt/ci/prepare.sh
 create mode 100644 models/nlp/language_model/roformer/ixrt/requirements.txt
 create mode 100644 models/nlp/language_model/videobert/ixrt/ci/prepare.sh
 create mode 100644 models/nlp/language_model/videobert/ixrt/requirements.txt

diff --git a/models/nlp/language_model/albert/ixrt/README.md b/models/nlp/language_model/albert/ixrt/README.md
index a2523b43..14ba41cd 100644
--- a/models/nlp/language_model/albert/ixrt/README.md
+++ b/models/nlp/language_model/albert/ixrt/README.md
@@ -11,17 +11,7 @@ Albert (A Lite BERT) is a variant of the BERT (Bidirectional Encoder Representat
 ```bash
 apt install -y libnuma-dev
 
-pip3 install onnxsim
-pip3 install onnx_graphsurgeon
-pip3 install scikit-learn
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
-pip3 install transformers==4.33.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/nlp/language_model/albert/ixrt/ci/prepare.sh b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..0f4f8f51
--- /dev/null
+++ b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+cp /root/data/3rd_party/albert-torch-fp32.json ./
+
+python3 torch2onnx.py --model_path /root/data/checkpoints/open_albert/albert-base-squad.pt --output_path albert-torch-fp32.onnx
+onnxsim albert-torch-fp32.onnx albert-torch-fp32-sim.onnx
+
+mkdir -p data/open_albert
+mv ./albert-torch-fp32-sim.onnx data/open_albert/albert.onnx
+
+wget http://files.deepspark.org.cn:880/deepspark/madlag.tar
+tar xvf madlag.tar
+rm -f madlag.tar
+
+# link and install requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+
+# modify perf_engine.py
+mv ./perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+
+# edit madlag/albert-base-v2-squad path
+sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
+
+# copy open_squad data
+cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
+
+# copy open_albert data
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_albert
+cp /root/data/checkpoints/open_albert/*.pt ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_albert
+
+# run acc script
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/common.py
+sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/compile_backend_iluvatar.py
+sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/runtime_backend_iluvatar.py
\ No newline at end of file
diff --git a/models/nlp/language_model/albert/ixrt/requirements.txt b/models/nlp/language_model/albert/ixrt/requirements.txt
new file mode 100644
index 00000000..cac1ba9a
--- /dev/null
+++ b/models/nlp/language_model/albert/ixrt/requirements.txt
@@ -0,0 +1,11 @@
+onnxsim
+onnx_graphsurgeon
+scikit-learn
+tqdm
+pycuda
+onnx
+tabulate
+cv2
+pycocotools
+opencv-python==4.6.0.66
+transformers==4.33.3
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh b/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh
index 293f9355..ddb6742b 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh
@@ -1,3 +1,39 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
+
+if [ "$1" = "nvidia" ]; then
+    cmake -S . -B build -DUSE_TENSORRT=true
+    cmake --build build -j16
+else
+    cmake -S . -B build
+    cmake --build build -j16
+fi
+
 mkdir -p ./python/data
 ln -s /root/data/checkpoints/bert_base_uncased_squad/ ./python/data && ln -s /root/data/datasets/squad/ ./python/data
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py b/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py
index a509f071..25a40278 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py
@@ -376,6 +376,10 @@ if __name__ == '__main__':
             print(F"E2E time : {infer_time:.3f} seconds")
             qps = len(squad_examples)/infer_time
             print(f"Latency QPS: {qps} sentences/s")
+            metricResult = {"metricResult": {}}
+            metricResult["metricResult"]["E2E time"] = round(infer_time, 3)
+            metricResult["metricResult"]["Latency QPS"] = round(qps, 3)
+            print(metricResult)
 
             with open(output_prediction_file, "w") as f:
                 f.write(json.dumps(all_predictions, indent=4))
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh b/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh
index 19e3e8a8..e9c50170 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh
@@ -1,3 +1,38 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+if [ "$1" = "nvidia" ]; then
+    cmake -S . -B build -DUSE_TENSORRT=true
+    cmake --build build -j16
+else
+    cmake -S . -B build
+    cmake --build build -j16
+fi
+
 pip install -r requirements.txt
 mkdir -p ./python/data
 ln -s /root/data/checkpoints/bert-large-uncased/ ./python/data && ln -s /root/data/datasets/squad/ ./python/data
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
index 860322c3..ec93972d 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
+++ b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
@@ -377,6 +377,10 @@ if __name__ == '__main__':
             
             qps = len(squad_examples)/infer_time
             print(f"Latency QPS: {qps} sentences/s")
+            metricResult = {"metricResult": {}}
+            metricResult["metricResult"]["E2E time"] = round(infer_time, 3)
+            metricResult["metricResult"]["Latency QPS"] = round(qps, 3)
+            print(metricResult)
 
             with open(output_prediction_file, "w") as f:
                 f.write(json.dumps(all_predictions, indent=4))
diff --git a/models/nlp/language_model/deberta/ixrt/README.md b/models/nlp/language_model/deberta/ixrt/README.md
index 221a33a8..69dcdf36 100644
--- a/models/nlp/language_model/deberta/ixrt/README.md
+++ b/models/nlp/language_model/deberta/ixrt/README.md
@@ -15,18 +15,7 @@ cd ${MODEL_PATH}
 
 apt install -y libnuma-dev
 
-pip3 install onnxsim
-pip3 install onnx_graphsurgeon
-pip3 install scikit-learn
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
-pip3 install tf2onnx
-pip3 install transformers==4.33.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..e3d30793
--- /dev/null
+++ b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+cp /root/data/3rd_party/deberta-torch-fp32.json ./
+python3 torch2onnx.py --model_path /root/data/checkpoints/open_deberta/deberta-base-squad.pt --output_path deberta-torch-fp32.onnx
+onnxsim deberta-torch-fp32.onnx deberta-torch-fp32-sim.onnx
+python3 remove_clip_and_cast.py
+
+mkdir -p data/open_deberta
+mv ./deberta-sim-drop-clip-drop-invaild-cast.onnx data/open_deberta/deberta.onnx
+
+ln -s ../../../../../toolbox/ByteMLPerf ./
+
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+
+# setup
+mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
+
+cp ./deberta-sim-drop-clip-drop-invaild-cast.onnx /root/data/checkpoints/open_deberta/
+cp -r /root/data/checkpoints/open_deberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/
+
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+wget http://files.deepspark.org.cn:880/deepspark/Palak.tar
+tar -zxvf Palak.tar
+
+#接着修改代码：ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py -AutoTokenizer.from_pretrained("Palak/microsoft_deberta-base_squad") => AutoTokenizer.from_pretrained("/Your/Path/Palak/microsoft_deberta-base_squad")
+
+# run acc perf
+sed -i 's/tensorrt_legacy/tensorrt/g' backends/ILUVATAR/common.py
\ No newline at end of file
diff --git a/models/nlp/language_model/deberta/ixrt/requirements.txt b/models/nlp/language_model/deberta/ixrt/requirements.txt
new file mode 100644
index 00000000..05393759
--- /dev/null
+++ b/models/nlp/language_model/deberta/ixrt/requirements.txt
@@ -0,0 +1,12 @@
+onnxsim
+onnx_graphsurgeon
+scikit-learn
+tqdm
+pycuda
+onnx
+tabulate
+cv2
+pycocotools
+opencv-python==4.6.0.66
+tf2onnx
+transformers==4.33.3
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md
index 0588c797..c2c8b73d 100644
--- a/models/nlp/language_model/roberta/ixrt/README.md
+++ b/models/nlp/language_model/roberta/ixrt/README.md
@@ -13,11 +13,7 @@ export PROJ_ROOT=/PATH/TO/DEEPSPARKINFERENCE
 export MODEL_PATH=${PROJ_ROOT}/models/nlp/language_model/roberta/ixrt
 cd ${MODEL_PATH}
 
-pip3 install onnxsim
-pip3 install py-libnuma==1.2
-pip3 install bert
-pip3 install pycuda
-pip3 install transformers==4.33.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..c045e361
--- /dev/null
+++ b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+mkdir -p data
+cp -r /root/data/checkpoints/open_roberta data/
+cp /root/data/3rd_party/roberta-torch-fp32.json ./
+# export onnx
+python3 export_onnx.py --model_path open_roberta/roberta-base-squad.pt --output_path open_roberta/roberta-torch-fp32.onnx
+
+# Simplify onnx model
+onnxsim open_roberta/roberta-torch-fp32.onnx open_roberta/roberta.onnx
+
+# Link and install requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+
+# Move open_roberta
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+mv open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+
+# Get open_squad
+cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
+
+# Get csarron.tar
+wget http://files.deepspark.org.cn:880/deepspark/csarron.tar
+tar xf csarron.tar
+rm -f csarron.tar
+mv csarron/ ./ByteMLPerf/byte_infer_perf/
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/requirements.txt b/models/nlp/language_model/roberta/ixrt/requirements.txt
new file mode 100644
index 00000000..b920daeb
--- /dev/null
+++ b/models/nlp/language_model/roberta/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+onnxsim
+py-libnuma==1.2
+bert
+pycuda
+transformers==4.33.3
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
index c088cf0f..de0fbbfb 100644
--- a/models/nlp/language_model/roformer/ixrt/README.md
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -11,10 +11,7 @@ Position encoding recently has shown effective in the transformer architecture.
 ```bash
 apt install -y libnuma-dev
 
-pip3 install tf2onnx
-pip3 install pycuda
-pip3 install onnxsim
-pip3 install py-libnuma==1.2
+pip3 install -r requirements.txt
 
 ```
 
diff --git a/models/nlp/language_model/roformer/ixrt/ci/prepare.sh b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..12dd18ba
--- /dev/null
+++ b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+mkdir -p data
+cp -r /root/data/checkpoints/open_roformer data/
+
+# export onnx
+python3 export_onnx.py --model_path ./data/open_roformer --output_path ./data/open_roformer/roformer-frozen_org.onnx
+
+# Simplify onnx model
+onnxsim ./data/open_roformer/roformer-frozen_org.onnx ./data/open_roformer/roformer-frozen.onnx
+python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer.onnx
+
+cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
+
+# link ByteMLPerf and install requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+
+mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+
+# Comment Line102 in compile_backend_iluvatar.py
+sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/compile_backend_iluvatar.py
+
+# Move open_roformer
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+
+# Setup open_cail2019 dataset
+cp /root/data/datasets/open_cail2019/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019
+
+# Go to general_perf/
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+# Modify model_zoo/roformer-tf-fp32.json
+sed -i 's/segment:0/segment0/g; s/token:0/token0/g' model_zoo/roformer-tf-fp32.json
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/requirements.txt b/models/nlp/language_model/roformer/ixrt/requirements.txt
new file mode 100644
index 00000000..60aeb062
--- /dev/null
+++ b/models/nlp/language_model/roformer/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tf2onnx
+pycuda
+onnxsim
+py-libnuma==1.2
\ No newline at end of file
diff --git a/models/nlp/language_model/videobert/ixrt/README.md b/models/nlp/language_model/videobert/ixrt/README.md
index d485fbe3..c389eaa3 100644
--- a/models/nlp/language_model/videobert/ixrt/README.md
+++ b/models/nlp/language_model/videobert/ixrt/README.md
@@ -11,17 +11,7 @@ VideoBERT is a model designed for video understanding tasks, extending the capab
 ```bash
 apt install -y libnuma-dev
 
-pip3 install onnxsim
-pip3 install onnx_graphsurgeon
-pip3 install scikit-learn
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
-pip3 install transformers==4.33.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/nlp/language_model/videobert/ixrt/ci/prepare.sh b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..86c8b8d7
--- /dev/null
+++ b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+mkdir -p data
+cp -r /root/data/checkpoints/open_videobert data/
+
+# link and install requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+
+# copy data
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
+cp -r /root/data/datasets/open_cifar/cifar-100-python/ ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
+cp /root/data/checkpoints/open_videobert/video-bert.onnx ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
+
+# run acc scripts
+mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
\ No newline at end of file
diff --git a/models/nlp/language_model/videobert/ixrt/requirements.txt b/models/nlp/language_model/videobert/ixrt/requirements.txt
new file mode 100644
index 00000000..cac1ba9a
--- /dev/null
+++ b/models/nlp/language_model/videobert/ixrt/requirements.txt
@@ -0,0 +1,11 @@
+onnxsim
+onnx_graphsurgeon
+scikit-learn
+tqdm
+pycuda
+onnx
+tabulate
+cv2
+pycocotools
+opencv-python==4.6.0.66
+transformers==4.33.3
\ No newline at end of file
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index c57cfd48..45f0d8b0 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -74,25 +74,15 @@ def main():
             logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
         logging.info(f"End running {model['name']} test case.")
 
-    # # OCR模型
-    # if model["task_type"] in ["cv/ocr"]:
-    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
-    #     d_url = model["download_url"]
-    #     if d_url is not None:
-    #         result = run_ocr_testcase(model)
-    #         check_model_result(result)
-    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
-    #     logging.info(f"End running {model['name']} test case.")
-
-    # # Trace模型
-    # if model["task_type"] in ["cv/trace"]:
-    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
-    #     d_url = model["download_url"]
-    #     if d_url is not None:
-    #         result = run_trace_testcase(model)
-    #         check_model_result(result)
-    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
-    #     logging.info(f"End running {model['name']} test case.")
+    # Segmentation模型
+    if model["task_type"] in ["cv/segmentation"]:
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_segmentation_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
 
     # # Speech模型
     # if model["task_type"] in ["speech/speech_recognition"]:
@@ -104,15 +94,15 @@ def main():
     #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
     #     logging.info(f"End running {model['name']} test case.")
 
-    # # NLP模型
-    # if model["task_type"] in ["nlp/language_model"]:
-    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
-    #     d_url = model["download_url"]
-    #     if d_url is not None:
-    #         result = run_nlp_testcase(model)
-    #         check_model_result(result)
-    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
-    #     logging.info(f"End running {model['name']} test case.")
+    # NLP模型
+    if model["task_type"] in ["nlp/language_model"]:
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_nlp_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
 
     logging.info(f"Full text result: {result}")
 
@@ -243,6 +233,9 @@ def run_detec_testcase(model):
         bash scripts/infer_{model_name}_{prec}_performance.sh
         """
 
+        if model_name == "rtmpose":
+            script = "python3 predict.py --model ./rtmpose_opt.onnx --precision fp16 --img_path demo/demo.jpg"
+
         r, t = run_script(script)
         sout = r.stdout
         fps_pattern = r"(?P<FPS>FPS\s*:\s*(\d+\.?\d*))"
@@ -282,7 +275,7 @@ def run_detec_testcase(model):
 
     return result
 
-def run_ocr_testcase(model):
+def run_segmentation_testcase(model):
     model_name = model["name"]
     result = {
         "name": model_name,
@@ -293,70 +286,6 @@ def run_ocr_testcase(model):
     dataset_n = model["datasets"].split("/")[-1]
     prepare_script = f"""
     cd ../{model['relative_path']}
-    ln -s /root/data/checkpoints/{checkpoint_n} ./
-    ln -s /root/data/datasets/{dataset_n} ./
-    unzip /root/data/3rd_party/PaddleOCR-release-2.6.zip -d ./PaddleOCR
-    bash ci/prepare.sh
-    """
-
-    # add pip list info when in debug mode
-    if utils.is_debug():
-        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
-        prepare_script = pip_list_script + prepare_script + pip_list_script
-
-    run_script(prepare_script)
-
-    for prec in model["precisions"]:
-        logging.info(f"Start running {model_name} {prec} test case")
-        script = f"""
-        cd ../{model['relative_path']}
-        export DATASETS_DIR=./{dataset_n}/
-        bash scripts/infer_{model_name}_{prec}_accuracy.sh
-        bash scripts/infer_{model_name}_{prec}_performance.sh
-        """
-
-        r, t = run_script(script)
-        sout = r.stdout
-        pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*"
-        matchs = re.findall(pattern, sout)
-        for m in matchs:
-            result["result"].setdefault(prec, {"status": "FAIL"})
-            try:
-                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1]), m[2]: float(m[3])}
-            except ValueError:
-                print("The string cannot be converted to a float.")
-                result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
-
-        pattern = METRIC_PATTERN
-        matchs = re.findall(pattern, sout)
-        if matchs and len(matchs) == 1:
-            result["result"].setdefault(prec, {})
-            result["result"][prec].update(get_metric_result(matchs[0]))
-            result["result"][prec]["status"] = "PASS"
-        result["result"][prec]["Cost time (s)"] = t
-        logging.debug(f"matchs:\n{matchs}")
-
-    return result
-
-def run_trace_testcase(model):
-    model_name = model["name"]
-    result = {
-        "name": model_name,
-        "result": {},
-    }
-    d_url = model["download_url"]
-    checkpoint_n = d_url.split("/")[-1]
-    dataset_n = model["datasets"].split("/")[-1]
-    prepare_script = f"""
-    cd ../{model['relative_path']}
-    ln -s /root/data/checkpoints/{checkpoint_n} ./
-    ln -s /root/data/datasets/{dataset_n} ./
-    """
-
-    if model["need_third_part"]:
-        prepare_script += "unzip /root/data/3rd_party/fast-reid.zip -d ./fast-reid\n"
-
-    prepare_script += """
     bash ci/prepare.sh
     ls -l | grep onnx
     """
@@ -373,27 +302,49 @@ def run_trace_testcase(model):
         script = f"""
         cd ../{model['relative_path']}
         export DATASETS_DIR=./{dataset_n}/
+        export PROJ_DIR=./
+        export CHECKPOINTS_DIR=./checkpoints
+        export COCO_GT=./{dataset_n}/annotations/instances_val2017.json
+        export EVAL_DIR=./{dataset_n}/val2017
+        export RUN_DIR=./
         bash scripts/infer_{model_name}_{prec}_accuracy.sh
         bash scripts/infer_{model_name}_{prec}_performance.sh
         """
 
         r, t = run_script(script)
         sout = r.stdout
-        pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*"
+        fps_pattern = r"(?P<FPS>FPS\s*:\s*(\d+\.?\d*))"
+        e2e_pattern = r"(?P<E2E>\s*E2E time\s*:\s*(\d+\.\d+)\s)"
+        combined_pattern = re.compile(f"{fps_pattern}|{e2e_pattern}")
+        matchs = combined_pattern.finditer(sout)
+        for match in matchs:
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            for name, value in match.groupdict().items():
+                if value:
+                    try:
+                        result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}")
+                        break
+                    except ValueError:
+                        print("The string cannot be converted to a float.")
+                        result["result"][prec][name] = value
+        pattern = r"Average Precision  \(AP\) @\[ (IoU=0.50[:\d.]*)\s*\| area=   all \| maxDets=\s?\d+\s?\] =\s*([\d.]+)"
         matchs = re.findall(pattern, sout)
         for m in matchs:
-            result["result"].setdefault(prec, {"status": "FAIL"})
+            result["result"].setdefault(prec, {})
             try:
-                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1]), m[2]: float(m[3])}
+                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1])}
             except ValueError:
                 print("The string cannot be converted to a float.")
-                result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
-        pattern = METRIC_PATTERN
-        matchs = re.findall(pattern, sout)
-        if matchs and len(matchs) == 1:
-            result["result"].setdefault(prec, {})
-            result["result"][prec].update(get_metric_result(matchs[0]))
+                result["result"][prec] = result["result"][prec] | {m[0]: m[1]}
+        if matchs and len(matchs) == 2:
             result["result"][prec]["status"] = "PASS"
+        else:
+            pattern = METRIC_PATTERN
+            matchs = re.findall(pattern, sout)
+            if matchs and len(matchs) == 1:
+                result["result"].setdefault(prec, {})
+                result["result"][prec].update(get_metric_result(matchs[0]))
+                result["result"][prec]["status"] = "PASS"
         result["result"][prec]["Cost time (s)"] = t
         logging.debug(f"matchs:\n{matchs}")
     return result
@@ -405,27 +356,12 @@ def run_nlp_testcase(model):
         "name": model_name,
         "result": {},
     }
-    d_url = model["download_url"]
-    checkpoint_n = d_url.split("/")[-1]
-    dataset_n = model["datasets"].split("/")[-1]
-    target_dirs = {"bert_base_squad": "csarron/bert-base-uncased-squad-v1", "bert_base_ner":"test", "bert_large_squad": "neuralmagic/bert-large-uncased-finetuned-squadv1"}
-    target_dir = target_dirs[model_name]
-    dirname = os.path.dirname(target_dir)
-    mkdir_script = f"mkdir -p {dirname}" if dirname else ""
-
     prepare_script = f"""
     set -x
     cd ../{model['relative_path']}
-    {mkdir_script}
-    ln -s /root/data/checkpoints/{checkpoint_n} ./{target_dir}
-    export DATASETS_DIR=/root/data/datasets/{dataset_n}
     bash ci/prepare.sh
     """
 
-    # prepare int8 model for bert_large_squad
-    if model_name == "bert_large_squad":
-        prepare_script += "ln -s /root/data/checkpoints/bert_large_int8.hdf5 ./\n"
-
     # add pip list info when in debug mode
     if utils.is_debug():
         pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
@@ -437,11 +373,48 @@ def run_nlp_testcase(model):
         logging.info(f"Start running {model_name} {prec} test case")
         script = f"""
         set -x
-        export DATASETS_DIR=/root/data/datasets/{dataset_n}
         cd ../{model['relative_path']}
-        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        export ORIGIN_ONNX_NAME=./data/open_{model_name}/{model_name}
+        export OPTIMIER_FILE=./optimizer.py
+        export PROJ_PATH=./
         bash scripts/infer_{model_name}_{prec}_performance.sh
+        cd ./ByteMLPerf/byte_infer_perf/general_perf
         """
+        if model_name == "roformer":
+            script += f"""
+            python3 core/perf_engine.py --hardware_type ILUVATAR --task roformer-tf-fp32
+            """
+        elif model_name == "videobert":
+            script += f"""
+            python3 core/perf_engine.py --hardware_type ILUVATAR --task {model_name}-onnx-fp32
+            """
+        else:
+            #  model_name == "roberta" or model_name == "deberta" or model_name == "albert"
+            script += f"""
+            python3 core/perf_engine.py --hardware_type ILUVATAR --task {model_name}-torch-fp32
+            """
+
+
+        if model_name == "bert_base_squad":
+            script = f"""
+            set -x
+            cd ../{model['relative_path']}/python
+            bash scripts/infer_{model_name}_{prec}_ixrt.sh
+            """
+        elif model_name == "bert_large_squad":
+            script = f"""
+            set -x
+            cd ../{model['relative_path']}/python
+            bash script/build_engine.sh --bs 32
+            bash script/inference_squad.sh --bs 32
+            """
+            if prec == "int8":
+                script = f"""
+                set -x
+                cd ../{model['relative_path']}/python
+                bash script/build_engine.sh --bs 32 --int8
+                bash script/inference_squad.sh --bs 32 --int8
+                """
 
         r, t = run_script(script)
         sout = r.stdout
-- 
Gitee


From 81c507e3de8f6a7624f142870337a7d6c20161b1 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Fri, 10 Jan 2025 09:38:28 +0800
Subject: [PATCH 15/35] update clr and detec

---
 .../cspdarknet53/ixrt/requirements.txt           |  3 ++-
 models/cv/detection/fcos/ixrt/README.md          |  2 ++
 models/cv/detection/fcos/ixrt/ci/prepare.sh      | 16 +++++++++++++++-
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/models/cv/classification/cspdarknet53/ixrt/requirements.txt b/models/cv/classification/cspdarknet53/ixrt/requirements.txt
index 40d37d5a..972db4d8 100644
--- a/models/cv/classification/cspdarknet53/ixrt/requirements.txt
+++ b/models/cv/classification/cspdarknet53/ixrt/requirements.txt
@@ -3,4 +3,5 @@ tqdm
 onnxsim
 ppq
 mmcv==1.5.3
-mmcls
\ No newline at end of file
+mmcls
+protobuf==3.20.0
\ No newline at end of file
diff --git a/models/cv/detection/fcos/ixrt/README.md b/models/cv/detection/fcos/ixrt/README.md
index edfdeefe..bc3f0461 100755
--- a/models/cv/detection/fcos/ixrt/README.md
+++ b/models/cv/detection/fcos/ixrt/README.md
@@ -23,6 +23,8 @@ pip3 install -r requirements.txt
 
 The inference of the FCOS model requires a dependency on a well-adapted mmcv-v1.7.0 library. Please inquire with the staff to obtain the relevant libraries.
 
+You can follow here to build: https://gitee.com/deep-spark/deepsparkhub/blob/master/toolbox/MMDetection/prepare_mmcv.sh
+
 ```bash
 
 cd mmcv
diff --git a/models/cv/detection/fcos/ixrt/ci/prepare.sh b/models/cv/detection/fcos/ixrt/ci/prepare.sh
index d1f5ae86..92b6b95a 100644
--- a/models/cv/detection/fcos/ixrt/ci/prepare.sh
+++ b/models/cv/detection/fcos/ixrt/ci/prepare.sh
@@ -25,9 +25,23 @@ else
     echo "Not Support Os"
 fi
 pip3 install -r requirements.txt
+cp -r /root/data/3rd_party/mmcv-v1.7.1 ./mmcv
+cp -r -T /root/data/repos/deepsparkhub/toolbox/MMDetection/patch/mmcv/v1.7.1 ./mmcv
+cd mmcv
+rm -rf mmcv/ops/csrc/common/cuda/spconv/ mmcv/ops/csrc/common/utils/spconv/
+rm -f mmcv/ops/csrc/pytorch/cpu/sparse_*
+rm -f mmcv/ops/csrc/pytorch/cuda/fused_spconv_ops_cuda.cu
+rm -f mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.cu
+rm -f mmcv/ops/csrc/pytorch/cuda/sparse_*
+rm -f mmcv/ops/csrc/pytorch/sp*
+
+bash clean_mmcv.sh
+bash build_mmcv.sh
+bash install_mmcv.sh
+cd ..
 
 mkdir -p checkpoints
-unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
+cp -r /root/data/3rd_party/mmdetection-v2.25.0 ./mmdetection
 cd mmdetection
 python3 tools/deployment/pytorch2onnx.py \
     ../fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py \
-- 
Gitee


From 4d58ac27530c3c45db0d24db686345657cc774ce Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Fri, 10 Jan 2025 13:06:32 +0800
Subject: [PATCH 16/35] update all ixrt model

---
 .../swin_transformer_large/ixrt/README.md     |   1 -
 .../swin_transformer_large/ixrt/ci/prepare.sh |  20 +-
 .../ixrt/perf_engine.py                       | 349 ------------------
 models/cv/face/facenet/ixrt/README.md         |  14 +-
 models/cv/face/facenet/ixrt/ci/prepare.sh     |  39 ++
 models/cv/face/facenet/ixrt/inference.py      |  11 +-
 models/cv/face/facenet/ixrt/requirements.txt  |  12 +
 .../ixrt/README.md                            |  14 +-
 .../ixrt/build_engine.py                      |   0
 .../lightweight_openpose/ixrt/ci/prepare.sh   |  37 ++
 .../ixrt/common.py                            |   0
 .../ixrt/datasets/__init__.py                 |   0
 .../ixrt/datasets/coco.py                     |   0
 .../ixrt/inference_accuracy.py                |   0
 .../ixrt/inference_performance.py             |   0
 .../ixrt/modules/__init__.py                  |   0
 .../ixrt/modules/keypoints.py                 |   0
 .../ixrt/modules/pose.py                      |   0
 .../ixrt/requirements.txt                     |   7 +
 ...nfer_lightweight_openpose_fp16_accuracy.sh |   0
 ...r_lightweight_openpose_fp16_performance.sh |   0
 .../cv/pose_estimation/rtmpose/ixrt/README.md |  10 +-
 .../rtmpose/ixrt/ci/prepare.sh                |  36 ++
 .../pose_estimation/rtmpose/ixrt/predict.py   |   2 +
 .../rtmpose/ixrt/requirements.txt             |   7 +
 .../segmentation/mask_rcnn/ixrt/ci/prepare.sh |  18 +
 .../mask_rcnn/ixrt/python/maskrcnn.py         |  16 +-
 models/cv/segmentation/solov1/ixrt/README.md  |  11 +-
 .../cv/segmentation/solov1/ixrt/ci/prepare.sh |  48 +++
 .../segmentation/solov1/ixrt/requirements.txt |   8 +
 .../solov1/ixrt/solov1_inference.py           |   8 +-
 .../clip/{ixformer => ixrt}/README.md         |   8 +-
 .../text_and_image/clip/ixrt/ci/prepare.sh    |  31 ++
 .../clip/{ixformer => ixrt}/inference.py      |  13 +-
 .../nlp/language_model/albert/ixrt/README.md  |   3 -
 .../language_model/albert/ixrt/ci/prepare.sh  |   4 +-
 .../language_model/albert/ixrt/perf_engine.py | 349 ------------------
 .../nlp/language_model/deberta/ixrt/README.md |   1 -
 .../language_model/deberta/ixrt/ci/prepare.sh |   2 +-
 .../deberta/ixrt/perf_engine.py               | 349 ------------------
 .../nlp/language_model/roberta/ixrt/README.md |   1 -
 .../language_model/roberta/ixrt/ci/prepare.sh |   2 +-
 .../roberta/ixrt/perf_engine.py               | 349 ------------------
 .../language_model/roformer/ixrt/README.md    |   2 -
 .../roformer/ixrt/ci/prepare.sh               |   2 -
 .../roformer/ixrt/perf_engine.py              | 349 ------------------
 .../videobert/ixrt/ci/prepare.sh              |   5 +-
 .../videobert/ixrt/perf_engine.py             | 349 ------------------
 .../ctr-prediction/widedeep/ixrt/README.md    |   6 +-
 .../widedeep/ixrt/ci/prepare.sh               |  52 +++
 .../widedeep/ixrt/requirements.txt            |   4 +
 .../conformer/ixrt/README.md                  |   6 +-
 .../conformer/ixrt/ci/prepare.sh              |  32 ++
 .../conformer/ixrt/ixrt_inference_accuracy.py |   3 +
 .../ixrt/ixrt_inference_performance.py        |   4 +
 .../conformer/ixrt/requirements.txt           |   5 +
 .../transformer_asr/ixrt/README.md            |   2 +-
 .../transformer_asr/ixrt/ci/prepare.sh        |  40 ++
 .../transformer_asr/ixrt/requirements.txt     |   1 +
 tests/models_ixrt.yaml                        |  12 +-
 tests/run_ixrt.py                             | 125 +++----
 .../general_perf/core/perf_engine.py          |  50 +--
 62 files changed, 517 insertions(+), 2312 deletions(-)
 delete mode 100644 models/cv/classification/swin_transformer_large/ixrt/perf_engine.py
 create mode 100644 models/cv/face/facenet/ixrt/ci/prepare.sh
 create mode 100644 models/cv/face/facenet/ixrt/requirements.txt
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/README.md (85%)
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/build_engine.py (100%)
 create mode 100644 models/cv/pose_estimation/lightweight_openpose/ixrt/ci/prepare.sh
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/common.py (100%)
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/datasets/__init__.py (100%)
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/datasets/coco.py (100%)
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/inference_accuracy.py (100%)
 mode change 100755 => 100644
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/inference_performance.py (100%)
 mode change 100755 => 100644
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/modules/__init__.py (100%)
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/modules/keypoints.py (100%)
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/modules/pose.py (100%)
 create mode 100644 models/cv/pose_estimation/lightweight_openpose/ixrt/requirements.txt
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh (100%)
 rename models/cv/pose_estimation/{lightweightopenpose => lightweight_openpose}/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh (100%)
 create mode 100644 models/cv/pose_estimation/rtmpose/ixrt/ci/prepare.sh
 create mode 100644 models/cv/pose_estimation/rtmpose/ixrt/requirements.txt
 create mode 100644 models/cv/segmentation/solov1/ixrt/ci/prepare.sh
 create mode 100644 models/cv/segmentation/solov1/ixrt/requirements.txt
 rename models/multimodal/text_and_image/clip/{ixformer => ixrt}/README.md (91%)
 create mode 100644 models/multimodal/text_and_image/clip/ixrt/ci/prepare.sh
 rename models/multimodal/text_and_image/clip/{ixformer => ixrt}/inference.py (82%)
 delete mode 100644 models/nlp/language_model/albert/ixrt/perf_engine.py
 delete mode 100644 models/nlp/language_model/deberta/ixrt/perf_engine.py
 delete mode 100644 models/nlp/language_model/roberta/ixrt/perf_engine.py
 delete mode 100644 models/nlp/language_model/roformer/ixrt/perf_engine.py
 delete mode 100644 models/nlp/language_model/videobert/ixrt/perf_engine.py
 create mode 100644 models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
 create mode 100644 models/recommendation/ctr-prediction/widedeep/ixrt/requirements.txt
 create mode 100644 models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
 create mode 100644 models/speech/speech_recognition/conformer/ixrt/requirements.txt
 create mode 100644 models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
 create mode 100644 models/speech/speech_recognition/transformer_asr/ixrt/requirements.txt

diff --git a/models/cv/classification/swin_transformer_large/ixrt/README.md b/models/cv/classification/swin_transformer_large/ixrt/README.md
index 1010b800..64f4daa6 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/README.md
+++ b/models/cv/classification/swin_transformer_large/ixrt/README.md
@@ -70,7 +70,6 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 
 # copy data
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 cp -r datasets/open_imagenet/* ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/
 mkdir -p ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
 cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
diff --git a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
index b88bcb1f..5adf3391 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
+++ b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
@@ -26,5 +26,21 @@ else
 fi
 
 pip install -r requirements.txt
-mkdir checkpoints
-python3 export_onnx.py --origin_model /root/data/checkpoints/squeezenet_v1_1.pth --output_model checkpoints/squeezenet_v1_1.onnx
\ No newline at end of file
+mkdir -p general_perf/model_zoo/regular
+mkdir -p general_perf/model_zoo/popular
+mkdir -p general_perf/model_zoo/sota
+
+cp /root/data/3rd_party/swin-large-torch-fp32.json ./
+cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
+cp -r /root/data/checkpoints/swin-large ./general_perf/model_zoo/popular/
+
+python3 torch2onnx.py --model_path ./general_perf/model_zoo/popular/swin-large/swin-transformer-large.pt --output_path swin-large-torch-fp32.onnx
+
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+
+# copy data
+cp -r /root/data/datasets/open_imagenet/* ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/
+mkdir -p ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
+cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
\ No newline at end of file
diff --git a/models/cv/classification/swin_transformer_large/ixrt/perf_engine.py b/models/cv/classification/swin_transformer_large/ixrt/perf_engine.py
deleted file mode 100644
index 089d9860..00000000
--- a/models/cv/classification/swin_transformer_large/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
diff --git a/models/cv/face/facenet/ixrt/README.md b/models/cv/face/facenet/ixrt/README.md
index 0c2df512..2aed8d25 100644
--- a/models/cv/face/facenet/ixrt/README.md
+++ b/models/cv/face/facenet/ixrt/README.md
@@ -15,19 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-glx
 
-pip3 install tensorflow
-pip3 install onnxsim
-pip3 install scikit-learn
-pip3 install tf_slim
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install scipy==1.8.0
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
-pip3 install simplejson
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/face/facenet/ixrt/ci/prepare.sh b/models/cv/face/facenet/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..b1882a4b
--- /dev/null
+++ b/models/cv/face/facenet/ixrt/ci/prepare.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+unzip -q /root/data/checkpoints/20180408-102900.zip -d ./
+unzip -q /root/data/datasets/facenet_datasets.zip -d ./
+mkdir -p checkpoints
+mkdir -p facenet_weights
+cp -r /root/data/3rd_party/facenet-pytorch ./
+cp ./tensorflow2pytorch.py facenet-pytorch
+python3 ./facenet-pytorch/tensorflow2pytorch.py \
+        --facenet_weights_path ./facenet_weights \
+        --facenet_pb_path ./20180408-102900 \
+        --onnx_save_name facenet_export.onnx
+mv facenet_export.onnx ./facenet_weights
+
+sed -i -e 's#/last_bn/BatchNormalization_output_0#1187#g' -e 's#/avgpool_1a/GlobalAveragePool_output_0#1178#g' deploy.py build_engine.py
\ No newline at end of file
diff --git a/models/cv/face/facenet/ixrt/inference.py b/models/cv/face/facenet/ixrt/inference.py
index ec9876e3..eaed8b27 100644
--- a/models/cv/face/facenet/ixrt/inference.py
+++ b/models/cv/face/facenet/ixrt/inference.py
@@ -58,6 +58,7 @@ def main(config):
         print("Warm Done.")
 
     # Inference
+    metricResult = {"metricResult": {}}
     if config.test_mode == "FPS":
         torch.cuda.synchronize()
         start_time = time.time()
@@ -73,6 +74,7 @@ def main(config):
 
         print("FPS : ", fps)
         print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+        metricResult["metricResult"]["FPS"] = round(fps, 3)
         if fps >= config.fps_target:
             print("pass!")
             exit()
@@ -84,7 +86,7 @@ def main(config):
 
         classes = []
         embeddings = []
-
+        start_time = time.time()
         for xb, yb in tqdm(embed_loader):
         
             output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
@@ -102,7 +104,8 @@ def main(config):
             classes.extend(yb[0:current_imgs_num].numpy())
             embeddings.extend(output)
 
-
+        e2e_time = time.time() - start_time
+        print(f"E2E time: {e2e_time:.3f} seconds")
         embeddings_dict = dict(zip(crop_paths,embeddings))
 
         pairs = read_pairs(config.datasets_dir + config.pairs_name)
@@ -119,6 +122,9 @@ def main(config):
         #eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr, fill_value="extrapolate")(x), 0., 1.)
         #print('Equal Error Rate (EER): %1.3f' % eer)
 
+        metricResult["metricResult"]["E2E time"] = round(e2e_time, 3)
+        metricResult["metricResult"]["AUC"] = round(auc, 3)
+        metricResult["metricResult"]["Acc"] = round(np.mean(accuracy), 3)
         acc = np.mean(accuracy)
         print(f"Accuracy Check : Test {acc} >= target {config.acc_target}")
         if acc >= config.acc_target:
@@ -127,6 +133,7 @@ def main(config):
         else:
             print("failed!")
             exit(1)
+    print(metricResult)
 
 def parse_config():
     parser = argparse.ArgumentParser()
diff --git a/models/cv/face/facenet/ixrt/requirements.txt b/models/cv/face/facenet/ixrt/requirements.txt
new file mode 100644
index 00000000..b1b549a8
--- /dev/null
+++ b/models/cv/face/facenet/ixrt/requirements.txt
@@ -0,0 +1,12 @@
+tensorflow
+onnxsim
+scikit-learn
+tf_slim
+tqdm
+pycuda
+onnx
+tabulate
+scipy==1.8.0
+pycocotools
+opencv-python==4.6.0.66
+simplejson
\ No newline at end of file
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/README.md b/models/cv/pose_estimation/lightweight_openpose/ixrt/README.md
similarity index 85%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/README.md
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/README.md
index cf25c1a8..ca18417a 100644
--- a/models/cv/pose_estimation/lightweightopenpose/ixrt/README.md
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install onnx
-pip3 install tqdm
-pip3 install onnxsim
-pip3 install simplejson
-pip3 install opencv-python==4.6.0.66
-pip3 install mmcv==1.5.3
-pip3 install pycocotools
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -37,15 +31,15 @@ cd lightweight-human-pose-estimation.pytorch
 mv scripts/convert_to_onnx.py .
 python3 convert_to_onnx.py --checkpoint-path /Path/to/checkpoint_iter_370000.pth
 cd ..
-mkdir lightweight_openpose
-onnxsim ./lightweight-human-pose-estimation.pytorch/human-pose-estimation.onnx ./lightweight_openpose/lightweight_openpose.onnx
+mkdir -p checkpoints
+onnxsim ./lightweight-human-pose-estimation.pytorch/human-pose-estimation.onnx ./checkpoints/lightweight_openpose.onnx
 ```
 
 ## Inference
 
 ```bash
 export DATASETS_DIR=/Path/to/coco_pose/
-export CHECKPOINTS_DIR=/Path/to/lightweight_openpose/
+export CHECKPOINTS_DIR=/Path/to/checkpoints/
 ```
 
 ### FP16
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/build_engine.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/build_engine.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/build_engine.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/build_engine.py
diff --git a/models/cv/pose_estimation/lightweight_openpose/ixrt/ci/prepare.sh b/models/cv/pose_estimation/lightweight_openpose/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..b7c493f5
--- /dev/null
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/ci/prepare.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+cp -r /root/data/3rd_party/lightweight-human-pose-estimation.pytorch ./
+cd lightweight-human-pose-estimation.pytorch
+mv scripts/convert_to_onnx.py .
+ln -s /root/data/checkpoints/checkpoint_iter_370000.pth ./
+python3 convert_to_onnx.py --checkpoint-path checkpoint_iter_370000.pth
+cd ..
+mkdir -p checkpoints
+onnxsim ./lightweight-human-pose-estimation.pytorch/human-pose-estimation.onnx ./checkpoints/lightweight_openpose.onnx
\ No newline at end of file
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/common.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/common.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/common.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/common.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/__init__.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/__init__.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/__init__.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/__init__.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/coco.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/coco.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/coco.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/coco.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_accuracy.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_accuracy.py
old mode 100755
new mode 100644
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/inference_accuracy.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/inference_accuracy.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_performance.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_performance.py
old mode 100755
new mode 100644
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/inference_performance.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/inference_performance.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/__init__.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/modules/__init__.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/modules/__init__.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/modules/__init__.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/keypoints.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/modules/keypoints.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/modules/keypoints.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/modules/keypoints.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/pose.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/modules/pose.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/modules/pose.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/modules/pose.py
diff --git a/models/cv/pose_estimation/lightweight_openpose/ixrt/requirements.txt b/models/cv/pose_estimation/lightweight_openpose/ixrt/requirements.txt
new file mode 100644
index 00000000..34aca051
--- /dev/null
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+onnx
+tqdm
+onnxsim
+simplejson
+opencv-python==4.6.0.66
+mmcv==1.5.3
+pycocotools
\ No newline at end of file
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh b/models/cv/pose_estimation/lightweight_openpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh b/models/cv/pose_estimation/lightweight_openpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh
diff --git a/models/cv/pose_estimation/rtmpose/ixrt/README.md b/models/cv/pose_estimation/rtmpose/ixrt/README.md
index 3e6b68a4..ea69ea32 100644
--- a/models/cv/pose_estimation/rtmpose/ixrt/README.md
+++ b/models/cv/pose_estimation/rtmpose/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install onnx
-pip3 install tqdm
-pip3 install onnxsim
-pip3 install mmdet==3.3.0
-pip3 install mmpose==1.3.1
-pip3 install mmdeploy==1.3.1
-pip3 install mmengine==0.10.4
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -37,7 +31,7 @@ Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the valida
 
 mkdir -p data/rtmpose
 
-wget -P data/rtmpose/   https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth
+wget -P data/rtmpose/ https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth
 
 python3 export.py --weight data/rtmpose/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth --cfg rtmpose-m_8xb256-420e_coco-256x192.py --input 1,3,256,192  --output data/rtmpose/rtmpose.onnx
 
diff --git a/models/cv/pose_estimation/rtmpose/ixrt/ci/prepare.sh b/models/cv/pose_estimation/rtmpose/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..d2af8179
--- /dev/null
+++ b/models/cv/pose_estimation/rtmpose/ixrt/ci/prepare.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+mkdir -p data/rtmpose
+ln -s /root/data/checkpoints/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth data/rtmpose/
+
+python3 export.py --weight data/rtmpose/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth --cfg rtmpose-m_8xb256-420e_coco-256x192.py --input 1,3,256,192  --output data/rtmpose/rtmpose.onnx
+
+# use onnxsim optimize onnx model
+onnxsim data/rtmpose/rtmpose.onnx data/rtmpose/rtmpose_opt.onnx
\ No newline at end of file
diff --git a/models/cv/pose_estimation/rtmpose/ixrt/predict.py b/models/cv/pose_estimation/rtmpose/ixrt/predict.py
index 9d11f889..51cfd6c7 100644
--- a/models/cv/pose_estimation/rtmpose/ixrt/predict.py
+++ b/models/cv/pose_estimation/rtmpose/ixrt/predict.py
@@ -150,6 +150,8 @@ def main():
         out_file="./result.jpg")
 
     print("Results saved as result.jpg.")
+    metricResult = {"metricResult": {"Results": "Results saved as result.jpg"}}
+    print(metricResult)
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/models/cv/pose_estimation/rtmpose/ixrt/requirements.txt b/models/cv/pose_estimation/rtmpose/ixrt/requirements.txt
new file mode 100644
index 00000000..c7459a7a
--- /dev/null
+++ b/models/cv/pose_estimation/rtmpose/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+onnx
+tqdm
+onnxsim
+mmdet==3.3.0
+mmpose==1.3.1
+mmdeploy==1.3.1
+mmengine==0.10.4
\ No newline at end of file
diff --git a/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh b/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh
index 38f651f9..66a85756 100644
--- a/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh
+++ b/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh
@@ -1,3 +1,21 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
 ln -s /root/data/checkpoints/maskrcnn.wts ./python/
 ln -s /root/data/datasets/coco ./coco
 if [ "$1" = "nvidia" ]; then
diff --git a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
index 75484195..455ff850 100644
--- a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
+++ b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
@@ -186,6 +186,7 @@ def run_maskrcnn(engine_file, image_folder):
 def get_maskrcnn_perf(config):
     cuda.init()
     logger = trt.Logger(trt.Logger.WARNING)
+    metricResult = {"metricResult": {}}
     engine_file_buffer = open(config.engine_file, "rb")
     runtime = trt.Runtime(logger)
     assert runtime
@@ -227,6 +228,8 @@ def get_maskrcnn_perf(config):
         output["allocation"].free()
     engine_file_buffer.close()
 
+    metricResult["metricResult"]["FPS"] = round(fps, 3)
+    print(metricResult)
     print("\nFPS : ", fps)
     print(f"Performance Check : Test {fps} >= target {config.fps_target}")
     if fps >= config.fps_target:
@@ -237,6 +240,7 @@ def get_maskrcnn_perf(config):
 
 def get_maskrcnn_acc(config):
     json_result = []
+    metricResult = {"metricResult": {}}
     class_map = coco80_to_coco91_class()
 
     # Load dataloader
@@ -313,10 +317,6 @@ def get_maskrcnn_acc(config):
             batched_paddings[0]
         )
         save2json(batch_img_id, bboxs_masks, json_result, class_map)
-    end_time = time.time()
-    end2end_time = end_time - start_time
-
-    print(F"E2E time : {end2end_time:.3f} seconds")
     print("Forward done !")
 
     tmp_result_name = "pred_results.json"
@@ -341,6 +341,10 @@ def get_maskrcnn_acc(config):
     print(f"==============================eval COCO segm mAP ==============================")
     segm_eval.summarize()
 
+    end_time = time.time()
+    end2end_time = end_time - start_time
+
+    print(F"E2E time : {end2end_time:.3f} seconds")
     _, map50 = eval.stats[:2]
     print("bbox mAP@0.5 : ", map50)
     print(f"bbox Accuracy Check : Test {map50} >= target {config.map_target}")
@@ -348,6 +352,10 @@ def get_maskrcnn_acc(config):
     _, segm_map50 = segm_eval.stats[:2]
     print("segm mAP@0.5 : ", segm_map50)
     print(f"segm Accuracy Check : Test {segm_map50} >= target {config.segm_map_target}")
+    metricResult["metricResult"]["E2E time"] = round(end2end_time, 3)
+    metricResult["metricResult"]["bbox mAP@0.5"] = round(map50, 3)
+    metricResult["metricResult"]["segm mAP@0.5"] = round(segm_map50, 3)
+    print(metricResult)
 
     if map50 >= config.map_target and segm_map50 >= config.segm_map_target:
         print("pass!")
diff --git a/models/cv/segmentation/solov1/ixrt/README.md b/models/cv/segmentation/solov1/ixrt/README.md
index d675f549..45de0d38 100644
--- a/models/cv/segmentation/solov1/ixrt/README.md
+++ b/models/cv/segmentation/solov1/ixrt/README.md
@@ -11,20 +11,15 @@ SOLO (Segmenting Objects by Locations) is a new instance segmentation method tha
 ```bash
 yum install mesa-libGL
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install mmdet==2.28.2
-pip3 install addict
-pip3 install yapf
-pip3 install pycuda
+pip3 install -r requirements.txt
 ```
 
 ### Dependency
 
 The inference of the Solov1 model requires a dependency on a well-adapted mmcv-v1.7.0 library. Please inquire with the staff to obtain the relevant libraries.
 
+You can follow here to build: https://gitee.com/deep-spark/deepsparkhub/blob/master/toolbox/MMDetection/prepare_mmcv.sh
+
 ```bash
 cd mmcv
 sh build_mmcv.sh
diff --git a/models/cv/segmentation/solov1/ixrt/ci/prepare.sh b/models/cv/segmentation/solov1/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..58447425
--- /dev/null
+++ b/models/cv/segmentation/solov1/ixrt/ci/prepare.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+cp -r /root/data/3rd_party/mmcv-v1.7.1 ./mmcv
+cp -r -T /root/data/repos/deepsparkhub/toolbox/MMDetection/patch/mmcv/v1.7.1 ./mmcv
+cd mmcv
+rm -rf mmcv/ops/csrc/common/cuda/spconv/ mmcv/ops/csrc/common/utils/spconv/
+rm -f mmcv/ops/csrc/pytorch/cpu/sparse_*
+rm -f mmcv/ops/csrc/pytorch/cuda/fused_spconv_ops_cuda.cu
+rm -f mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.cu
+rm -f mmcv/ops/csrc/pytorch/cuda/sparse_*
+rm -f mmcv/ops/csrc/pytorch/sp*
+
+bash clean_mmcv.sh
+bash build_mmcv.sh
+bash install_mmcv.sh
+cd ..
+
+mkdir -p checkpoints
+ln -s /root/data/checkpoints/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth ./
+python3 solo_torch2onnx.py --cfg ./solo_r50_fpn_3x_coco.py --checkpoint ./solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth --batch_size 1
+mv r50_solo_bs1_800x800.onnx ./checkpoints/r50_solo_bs1_800x800.onnx
\ No newline at end of file
diff --git a/models/cv/segmentation/solov1/ixrt/requirements.txt b/models/cv/segmentation/solov1/ixrt/requirements.txt
new file mode 100644
index 00000000..0c2fa634
--- /dev/null
+++ b/models/cv/segmentation/solov1/ixrt/requirements.txt
@@ -0,0 +1,8 @@
+tqdm
+onnx
+onnxsim
+tabulate
+mmdet==2.28.2
+addict
+yapf
+pycuda
\ No newline at end of file
diff --git a/models/cv/segmentation/solov1/ixrt/solov1_inference.py b/models/cv/segmentation/solov1/ixrt/solov1_inference.py
index 473bff85..e2f0ddf0 100644
--- a/models/cv/segmentation/solov1/ixrt/solov1_inference.py
+++ b/models/cv/segmentation/solov1/ixrt/solov1_inference.py
@@ -139,14 +139,18 @@ def main():
     # Load Engine
     engine, context = create_engine_context(args.engine, logger)
     inputs, outputs, allocations = get_io_bindings(engine)
-    
+    metricResult = {"metricResult": {}}
     if args.task=="precision":
+        start_time = time.time()
         segm_mAP= eval_coco(args,inputs, outputs, allocations, context)
+        e2e_time = time.time() - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         
         print("="*40)
         print("segm_mAP:{0}".format(round(segm_mAP,3)))
         print("="*40)
         print(f"Check segm_mAP Test : {round(segm_mAP,3)}  Target:{args.target_map} State : {'Pass' if round(segm_mAP,3) >= args.target_map else 'Fail'}")
+        metricResult["metricResult"]["segm_mAP"] = round(segm_mAP, 3)
         status_map = check_target(segm_mAP, args.target_map)
         sys.exit(int(not (status_map)))   
     else:
@@ -162,8 +166,10 @@ def main():
         print("fps:{0}".format(round(fps,2)))
         print("="*40)
         print(f"Check fps Test : {round(fps,3)}  Target:{args.target_fps} State : {'Pass' if  fps >= args.target_fps else 'Fail'}")
+        metricResult["metricResult"]["FPS"] = round(fps, 3)
         status_fps = check_target(fps, args.target_fps)
         sys.exit(int(not (status_fps)))
+    print(metricResult)
     
 if __name__ == "__main__":
     
diff --git a/models/multimodal/text_and_image/clip/ixformer/README.md b/models/multimodal/text_and_image/clip/ixrt/README.md
similarity index 91%
rename from models/multimodal/text_and_image/clip/ixformer/README.md
rename to models/multimodal/text_and_image/clip/ixrt/README.md
index 7b5ccd67..587b1cc2 100644
--- a/models/multimodal/text_and_image/clip/ixformer/README.md
+++ b/models/multimodal/text_and_image/clip/ixrt/README.md
@@ -25,9 +25,9 @@ pip3 install -U transformers==4.27.1
 Pretrained model: Go to the website <https://huggingface.co/models> to find the pre-trained model you need. Here, we choose clip-vit-base-patch32.
 
 ```bash
-# Download model from the website and make sure the model's path is "/home/data/openai/clip-vit-base-patch32"
-mkdir -p /data
-unzip clip-vit-base-patch32.zip
+# Download model from the website and make sure the model's path is "data/clip-vit-base-patch32"
+mkdir -p data
+unzip clip-vit-base-patch32.zip -d data/
 ```
 
 ## Run model
@@ -37,5 +37,5 @@ unzip clip-vit-base-patch32.zip
 Please modify the part in the test_clip.py script that pertains to the model path.
 
 ```bash
-python3 test_clip.py
+python3 inference.py
 ```
diff --git a/models/multimodal/text_and_image/clip/ixrt/ci/prepare.sh b/models/multimodal/text_and_image/clip/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..606a1d98
--- /dev/null
+++ b/models/multimodal/text_and_image/clip/ixrt/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -U transformers==4.27.1
+
+mkdir -p data
+ln -s /root/data/checkpoints/clip-vit-base-patch32 data/
\ No newline at end of file
diff --git a/models/multimodal/text_and_image/clip/ixformer/inference.py b/models/multimodal/text_and_image/clip/ixrt/inference.py
similarity index 82%
rename from models/multimodal/text_and_image/clip/ixformer/inference.py
rename to models/multimodal/text_and_image/clip/ixrt/inference.py
index 013c96e8..5821b7e9 100644
--- a/models/multimodal/text_and_image/clip/ixformer/inference.py
+++ b/models/multimodal/text_and_image/clip/ixrt/inference.py
@@ -26,18 +26,20 @@ from transformers import CLIPProcessor
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = (
-    CLIPModel.from_pretrained("/home/data/openai/clip-vit-base-patch32")
+    CLIPModel.from_pretrained("/data/clip-vit-base-patch32")
     .to(device)
     .half()
 )
 model = model.eval()
-processor = CLIPProcessor.from_pretrained("/home/data/openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("data/clip-vit-base-patch32")
 
 url = "http://images.cocodataset.org/val2017/000000039769.jpg"
 image = Image.open(requests.get(url, stream=True).raw)
 
+metricResult = {"metricResult": {}}
 batch_size_list = [32, 64, 128, 256, 512, 1024, 2048]
 with torch.no_grad():
+    e2e_start_time = time.time()
     for batch_size in batch_size_list:
         images = [image for item in range(batch_size)]
         inputs = processor(
@@ -67,5 +69,8 @@ with torch.no_grad():
         )  # we can take the softmax to get the label probabilities
         print(probs[:5])
         print(probs[-5:-1])
-
-        print("QPS: ", batch_size / (end_time - start_time))
\ No newline at end of file
+        metricResult["metricResult"]["QPS-{batch_size}"] = round(batch_size / (end_time - start_time), 3)
+        print("QPS: ", batch_size / (end_time - start_time))
+    e2e_time = time.time() - e2e_start_time
+    metricResult["metricResult"]["E2E time"] = round(e2e_time, 3)
+    print(metricResult)
\ No newline at end of file
diff --git a/models/nlp/language_model/albert/ixrt/README.md b/models/nlp/language_model/albert/ixrt/README.md
index 14ba41cd..1cc0156d 100644
--- a/models/nlp/language_model/albert/ixrt/README.md
+++ b/models/nlp/language_model/albert/ixrt/README.md
@@ -67,9 +67,6 @@ ln -s ${PROJ_ROOT}/toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 
-# modify perf_engine.py
-mv ./perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
-
 # edit madlag/albert-base-v2-squad path
 sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
 
diff --git a/models/nlp/language_model/albert/ixrt/ci/prepare.sh b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
index 0f4f8f51..41644a02 100644
--- a/models/nlp/language_model/albert/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
@@ -26,6 +26,7 @@ else
 fi
 
 cp /root/data/3rd_party/albert-torch-fp32.json ./
+cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 
 python3 torch2onnx.py --model_path /root/data/checkpoints/open_albert/albert-base-squad.pt --output_path albert-torch-fp32.onnx
 onnxsim albert-torch-fp32.onnx albert-torch-fp32-sim.onnx
@@ -42,9 +43,6 @@ ln -s ../../../../../toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 
-# modify perf_engine.py
-mv ./perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
-
 # edit madlag/albert-base-v2-squad path
 sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
 
diff --git a/models/nlp/language_model/albert/ixrt/perf_engine.py b/models/nlp/language_model/albert/ixrt/perf_engine.py
deleted file mode 100644
index 089d9860..00000000
--- a/models/nlp/language_model/albert/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
diff --git a/models/nlp/language_model/deberta/ixrt/README.md b/models/nlp/language_model/deberta/ixrt/README.md
index 69dcdf36..3e5f944b 100644
--- a/models/nlp/language_model/deberta/ixrt/README.md
+++ b/models/nlp/language_model/deberta/ixrt/README.md
@@ -67,7 +67,6 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 
 # setup
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 cp ./datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
 
 mv ./deberta-sim-drop-clip-drop-invaild-cast.onnx general_perf/model_zoo/popular/open_deberta/
diff --git a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
index e3d30793..7a113b5a 100644
--- a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
@@ -28,6 +28,7 @@ fi
 pip install -r requirements.txt
 
 cp /root/data/3rd_party/deberta-torch-fp32.json ./
+cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 python3 torch2onnx.py --model_path /root/data/checkpoints/open_deberta/deberta-base-squad.pt --output_path deberta-torch-fp32.onnx
 onnxsim deberta-torch-fp32.onnx deberta-torch-fp32-sim.onnx
 python3 remove_clip_and_cast.py
@@ -41,7 +42,6 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 
 # setup
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
 
 cp ./deberta-sim-drop-clip-drop-invaild-cast.onnx /root/data/checkpoints/open_deberta/
diff --git a/models/nlp/language_model/deberta/ixrt/perf_engine.py b/models/nlp/language_model/deberta/ixrt/perf_engine.py
deleted file mode 100644
index 089d9860..00000000
--- a/models/nlp/language_model/deberta/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md
index c2c8b73d..957fce3b 100644
--- a/models/nlp/language_model/roberta/ixrt/README.md
+++ b/models/nlp/language_model/roberta/ixrt/README.md
@@ -66,7 +66,6 @@ For detailed steps regarding this model, please refer to this document: <https:/
 ln -s ${PROJ_ROOT}/toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
 # Move open_roberta
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
diff --git a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
index c045e361..390aab7c 100644
--- a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
@@ -30,6 +30,7 @@ pip install -r requirements.txt
 mkdir -p data
 cp -r /root/data/checkpoints/open_roberta data/
 cp /root/data/3rd_party/roberta-torch-fp32.json ./
+cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 # export onnx
 python3 export_onnx.py --model_path open_roberta/roberta-base-squad.pt --output_path open_roberta/roberta-torch-fp32.onnx
 
@@ -40,7 +41,6 @@ onnxsim open_roberta/roberta-torch-fp32.onnx open_roberta/roberta.onnx
 ln -s ../../../../../toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
 # Move open_roberta
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
diff --git a/models/nlp/language_model/roberta/ixrt/perf_engine.py b/models/nlp/language_model/roberta/ixrt/perf_engine.py
deleted file mode 100644
index f3f10847..00000000
--- a/models/nlp/language_model/roberta/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
index de0fbbfb..26e76714 100644
--- a/models/nlp/language_model/roformer/ixrt/README.md
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -72,8 +72,6 @@ For detailed steps regarding this model, please refer to this document: <https:/
 ln -s ${PROJ_ROOT}/toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
-
 # Comment Line102 in compile_backend_iluvatar.py
 sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/compile_backend_iluvatar.py
 
diff --git a/models/nlp/language_model/roformer/ixrt/ci/prepare.sh b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
index 12dd18ba..2edc9c04 100644
--- a/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
@@ -43,8 +43,6 @@ cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 ln -s ../../../../../toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
-
 # Comment Line102 in compile_backend_iluvatar.py
 sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/compile_backend_iluvatar.py
 
diff --git a/models/nlp/language_model/roformer/ixrt/perf_engine.py b/models/nlp/language_model/roformer/ixrt/perf_engine.py
deleted file mode 100644
index f3f10847..00000000
--- a/models/nlp/language_model/roformer/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
\ No newline at end of file
diff --git a/models/nlp/language_model/videobert/ixrt/ci/prepare.sh b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
index 86c8b8d7..46444091 100644
--- a/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
@@ -29,7 +29,7 @@ pip install -r requirements.txt
 
 mkdir -p data
 cp -r /root/data/checkpoints/open_videobert data/
-
+cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 # link and install requirements
 ln -s ../../../../../toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
@@ -40,6 +40,3 @@ mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
 cp -r /root/data/datasets/open_cifar/cifar-100-python/ ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
 cp /root/data/checkpoints/open_videobert/video-bert.onnx ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
-
-# run acc scripts
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
\ No newline at end of file
diff --git a/models/nlp/language_model/videobert/ixrt/perf_engine.py b/models/nlp/language_model/videobert/ixrt/perf_engine.py
deleted file mode 100644
index 089d9860..00000000
--- a/models/nlp/language_model/videobert/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
index 94954558..835d70a7 100644
--- a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
@@ -9,10 +9,7 @@ Generalized linear models with nonlinear feature transformations are widely used
 ### Install
 
 ```bash
-pip3 install tf2onnx
-pip3 install pycuda
-pip3 install onnxsim
-pip3 install py-libnuma==1.2
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -62,7 +59,6 @@ For detailed steps regarding this model, please refer to this document: <https:/
 # link and install ByteMLPerf requirements
 ln -s ${PROJ_ROOT}/toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
 # Get eval.csv and onnx
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..3b31a219
--- /dev/null
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+cp -r /root/data/checkpoints/open_wide_deep_saved_model ./
+cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
+python3 export_onnx.py --model_path open_wide_deep_saved_model --output_path open_wide_deep_saved_model/widedeep.onnx
+
+# Simplify onnx model
+onnxsim open_wide_deep_saved_model/widedeep.onnx open_wide_deep_saved_model/widedeep_sim.onnx
+python3 deploy.py --model_path open_wide_deep_saved_model/widedeep_sim.onnx --output_path open_wide_deep_saved_model/widedeep_sim.onnx
+python3 change2dynamic.py --model_path open_wide_deep_saved_model/widedeep_sim.onnx --output_path open_wide_deep_saved_model/widedeep_sim.onnx
+
+mkdir -p data/open_widedeep
+mv open_wide_deep_saved_model/widedeep_sim.onnx data/open_widedeep/widedeep.onnx
+
+# link and install ByteMLPerf requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+
+# Get eval.csv and onnx
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
+
+cp /root/data/datasets/eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
+
+wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx
+mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
\ No newline at end of file
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/requirements.txt b/models/recommendation/ctr-prediction/widedeep/ixrt/requirements.txt
new file mode 100644
index 00000000..60aeb062
--- /dev/null
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tf2onnx
+pycuda
+onnxsim
+py-libnuma==1.2
\ No newline at end of file
diff --git a/models/speech/speech_recognition/conformer/ixrt/README.md b/models/speech/speech_recognition/conformer/ixrt/README.md
index 2ad0e26a..593cda36 100644
--- a/models/speech/speech_recognition/conformer/ixrt/README.md
+++ b/models/speech/speech_recognition/conformer/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-glx
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install typeguard==2.13.3
-pip3 install onnxsim
-pip3 install pycuda
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..aff3ca58
--- /dev/null
+++ b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+mkdir -p conformer_checkpoints
+ln -s /home/deepspark/datasets/INFER/conformer/20210601_u2++_conformer_exp_aishell ./conformer_checkpoints
+
+cp -r /root/data/datasets/AISHELL/data_aishell ./
+bash scripts/aishell_data_prepare.sh data_aishell tools
\ No newline at end of file
diff --git a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
index 35aad9bb..65245b2b 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
+++ b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
@@ -272,6 +272,9 @@ def main():
 
     cer, corr = calculate_cer(results, reference_data)
     target_cer = float(os.environ["Accuracy"])
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["CER"] = round(cer, 3)
+    print(metricResult)
     print("CER: ", cer, "target CER: ", target_cer)
     if cer <= target_cer:
         print("pass!")
diff --git a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
index c19233fa..3ab7e405 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
+++ b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
@@ -261,6 +261,10 @@ def main():
     print(f"Recognize {num_samples} sentences, {QPS} sentences/s")
     target_qps = float(os.environ["Accuracy"])
     print("QPS: = ", QPS, "target QPS: ", target_qps)
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["QPS"] = round(QPS, 3)
+    metricResult["metricResult"]["target QPS"] = round(target_qps, 3)
+    print(metricResult)
     if QPS >= target_qps:
         print("pass!")
         exit()
diff --git a/models/speech/speech_recognition/conformer/ixrt/requirements.txt b/models/speech/speech_recognition/conformer/ixrt/requirements.txt
new file mode 100644
index 00000000..dd4788cf
--- /dev/null
+++ b/models/speech/speech_recognition/conformer/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+typeguard==2.13.3
+onnxsim
+pycuda
\ No newline at end of file
diff --git a/models/speech/speech_recognition/transformer_asr/ixrt/README.md b/models/speech/speech_recognition/transformer_asr/ixrt/README.md
index 0c2e1b45..9d809a7d 100644
--- a/models/speech/speech_recognition/transformer_asr/ixrt/README.md
+++ b/models/speech/speech_recognition/transformer_asr/ixrt/README.md
@@ -9,7 +9,7 @@ Beam search allows us to exert control over the output of text generation. This
 ### Install
 
 ```bash
-pip3 install speechbrain==0.5.13
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
new file mode 100644
index 00000000..9e4e9403
--- /dev/null
+++ b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+mkdir -p results/transformer/8886/save
+mkdir -p /home/data/speechbrain/aishell/csv_data
+cp -r /root/data/datasets/AISHELL/data_aishell /home/data/speechbrain/aishell
+cp results/transformer/8886/*.csv /home/data/speechbrain/aishell/csv_data
+
+bash build.sh
+
+python3 builder.py \
+--ckpt_path results/transformer/8886/save \
+--head_num 4 \
+--max_batch_size 64  \
+--max_seq_len 1024 \
+--engine_path transformer.engine
\ No newline at end of file
diff --git a/models/speech/speech_recognition/transformer_asr/ixrt/requirements.txt b/models/speech/speech_recognition/transformer_asr/ixrt/requirements.txt
new file mode 100644
index 00000000..4aa5b041
--- /dev/null
+++ b/models/speech/speech_recognition/transformer_asr/ixrt/requirements.txt
@@ -0,0 +1 @@
+speechbrain==0.5.13
\ No newline at end of file
diff --git a/tests/models_ixrt.yaml b/tests/models_ixrt.yaml
index 8282ad95..051d0017 100644
--- a/tests/models_ixrt.yaml
+++ b/tests/models_ixrt.yaml
@@ -367,7 +367,7 @@
     - int8
   relative_path: models/cv/detection/yolox/ixrt
   task_type: cv/detection
-- datasets: https://vis-www.cs.umass.edu/lfw/lfw.tgz
+- datasets: https://raw.githubusercontent.com/lanrax/Project_dataset/master/facenet_datasets.zip
   download_url: https://drive.google.com/open?id=1R77HmFADxe87GmoLwzfgMu_HY0IhcyBz
   name: facenet
   need_third_part: true
@@ -415,8 +415,8 @@
   need_third_part: false
   precisions:
     - fp16
-  relative_path: models/multimodal/text_and_image/clip/ixformer
-  task_type: cv/segmentation
+  relative_path: models/multimodal/text_and_image/clip/ixrt
+  task_type: multimodal/text_and_image
 - datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar
   download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_albert.tar
   name: albert
@@ -482,15 +482,15 @@
   need_third_part: false
   precisions:
     - fp16
-    - int8
-  relative_path: models/recommendation/widedeep/ixrt
-  task_type: recommendation/widedeep
+  relative_path: models/recommendation/ctr-prediction/widedeep/ixrt
+  task_type: recommendation/ctr-prediction
 - datasets: https://www.openslr.org/33/aishell.tar.gz
   download_url: http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20211025_conformer_exp.tar.gz
   name: conformer
   need_third_part: true
   precisions:
     - fp16
+    - int8
   relative_path: models/speech/speech_recognition/conformer/ixrt
   task_type: speech/speech_recognition
 - datasets: https://www.openslr.org/33/aishell.tar.gz
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index 45f0d8b0..9bb9a1ed 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -75,27 +75,27 @@ def main():
         logging.info(f"End running {model['name']} test case.")
 
     # Segmentation模型
-    if model["task_type"] in ["cv/segmentation"]:
+    if model["task_type"] in ["cv/segmentation", "cv/face", "multimodal/text_and_image"]:
         logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
         d_url = model["download_url"]
         if d_url is not None:
-            result = run_segmentation_testcase(model)
+            result = run_segmentation_and_face_testcase(model)
             check_model_result(result)
             logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
         logging.info(f"End running {model['name']} test case.")
 
-    # # Speech模型
-    # if model["task_type"] in ["speech/speech_recognition"]:
-    #     logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
-    #     d_url = model["download_url"]
-    #     if d_url is not None:
-    #         result = run_speech_testcase(model)
-    #         check_model_result(result)
-    #         logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
-    #     logging.info(f"End running {model['name']} test case.")
+    # Speech模型
+    if model["task_type"] in ["speech/speech_recognition"]:
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_speech_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
 
     # NLP模型
-    if model["task_type"] in ["nlp/language_model"]:
+    if model["task_type"] in ["nlp/language_model", "recommendation/ctr-prediction"]:
         logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
         d_url = model["download_url"]
         if d_url is not None:
@@ -167,6 +167,17 @@ def run_clf_testcase(model):
         bash scripts/infer_{model_name}_{prec}_performance.sh
         """
 
+        if model_name == "swin_transformer_large":
+            script = f"""
+            cd ../{model['relative_path']}
+            export ORIGIN_ONNX_NAME=./swin-large-torch-fp32
+            export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+            export PROJ_PATH=./
+            bash scripts/infer_swinl_fp16_performance.sh
+            cd ./ByteMLPerf/byte_infer_perf/general_perf
+            python3 core/perf_engine.py --hardware_type ILUVATAR --task swin-large-torch-fp32
+            """
+
         r, t = run_script(script)
         sout = r.stdout
         matchs = combined_pattern.finditer(sout)
@@ -200,11 +211,6 @@ def run_detec_testcase(model):
     bash ci/prepare.sh
     """
 
-    # if model["need_third_part"] and model["3rd_party_repo"]:
-    #     third_party_repo = model["3rd_party_repo"]
-    #     prepare_script += f"unzip /root/data/3rd_party/{third_party_repo}.zip -d ./\n"
-    # prepare_script += "bash ci/prepare.sh\n"
-
     # add pip list info when in debug mode
     if utils.is_debug():
         pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
@@ -234,7 +240,7 @@ def run_detec_testcase(model):
         """
 
         if model_name == "rtmpose":
-            script = "python3 predict.py --model ./rtmpose_opt.onnx --precision fp16 --img_path demo/demo.jpg"
+            script = "python3 predict.py --model data/rtmpose/rtmpose_opt.onnx --precision fp16 --img_path demo/demo.jpg"
 
         r, t = run_script(script)
         sout = r.stdout
@@ -275,14 +281,12 @@ def run_detec_testcase(model):
 
     return result
 
-def run_segmentation_testcase(model):
+def run_segmentation_and_face_testcase(model):
     model_name = model["name"]
     result = {
         "name": model_name,
         "result": {},
     }
-    d_url = model["download_url"]
-    checkpoint_n = d_url.split("/")[-1]
     dataset_n = model["datasets"].split("/")[-1]
     prepare_script = f"""
     cd ../{model['relative_path']}
@@ -307,44 +311,23 @@ def run_segmentation_testcase(model):
         export COCO_GT=./{dataset_n}/annotations/instances_val2017.json
         export EVAL_DIR=./{dataset_n}/val2017
         export RUN_DIR=./
+
         bash scripts/infer_{model_name}_{prec}_accuracy.sh
         bash scripts/infer_{model_name}_{prec}_performance.sh
         """
 
         r, t = run_script(script)
         sout = r.stdout
-        fps_pattern = r"(?P<FPS>FPS\s*:\s*(\d+\.?\d*))"
-        e2e_pattern = r"(?P<E2E>\s*E2E time\s*:\s*(\d+\.\d+)\s)"
-        combined_pattern = re.compile(f"{fps_pattern}|{e2e_pattern}")
-        matchs = combined_pattern.finditer(sout)
-        for match in matchs:
-            result["result"].setdefault(prec, {"status": "FAIL"})
-            for name, value in match.groupdict().items():
-                if value:
-                    try:
-                        result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}")
-                        break
-                    except ValueError:
-                        print("The string cannot be converted to a float.")
-                        result["result"][prec][name] = value
-        pattern = r"Average Precision  \(AP\) @\[ (IoU=0.50[:\d.]*)\s*\| area=   all \| maxDets=\s?\d+\s?\] =\s*([\d.]+)"
+        
+        pattern = METRIC_PATTERN
         matchs = re.findall(pattern, sout)
+        result["result"].setdefault(prec, {"status": "FAIL"})
+        logging.debug(f"matchs:\n{matchs}")
         for m in matchs:
-            result["result"].setdefault(prec, {})
-            try:
-                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1])}
-            except ValueError:
-                print("The string cannot be converted to a float.")
-                result["result"][prec] = result["result"][prec] | {m[0]: m[1]}
-        if matchs and len(matchs) == 2:
+            result["result"][prec].update(get_metric_result(m))
+        if len(matchs) == 2:
             result["result"][prec]["status"] = "PASS"
-        else:
-            pattern = METRIC_PATTERN
-            matchs = re.findall(pattern, sout)
-            if matchs and len(matchs) == 1:
-                result["result"].setdefault(prec, {})
-                result["result"][prec].update(get_metric_result(matchs[0]))
-                result["result"][prec]["status"] = "PASS"
+
         result["result"][prec]["Cost time (s)"] = t
         logging.debug(f"matchs:\n{matchs}")
     return result
@@ -380,9 +363,9 @@ def run_nlp_testcase(model):
         bash scripts/infer_{model_name}_{prec}_performance.sh
         cd ./ByteMLPerf/byte_infer_perf/general_perf
         """
-        if model_name == "roformer":
+        if model_name == "roformer" or model_name == "widedeep":
             script += f"""
-            python3 core/perf_engine.py --hardware_type ILUVATAR --task roformer-tf-fp32
+            python3 core/perf_engine.py --hardware_type ILUVATAR --task {model_name}-tf-fp32
             """
         elif model_name == "videobert":
             script += f"""
@@ -442,18 +425,6 @@ def run_speech_testcase(model):
     dataset_n = model["datasets"].split("/")[-1]
     prepare_script = f"""
     cd ../{model['relative_path']}
-    ln -s /root/data/checkpoints/{checkpoint_n} ./
-    ln -s /root/data/datasets/{dataset_n} ./
-    """
-
-    if model["need_third_part"] and model_name == "conformer":
-        prepare_script += "unzip /root/data/3rd_party/kenlm.zip -d ./ctc_decoder/swig/kenlm\n"
-        prepare_script += "unzip /root/data/3rd_party/ThreadPool.zip -d ./ctc_decoder/swig/ThreadPool\n"
-        prepare_script += "tar -xzvf /root/data/3rd_party/openfst-1.6.3.tar.gz -C ./ctc_decoder/swig/\n"
-
-    prepare_script += """
-    export PYTHONPATH=`pwd`/wenet:$PYTHONPATH
-    echo $PYTHONPATH
     bash ci/prepare.sh
     ls -l | grep onnx
     """
@@ -469,29 +440,27 @@ def run_speech_testcase(model):
         logging.info(f"Start running {model_name} {prec} test case")
         script = f"""
         cd ../{model['relative_path']}
-        export PYTHONPATH=./wenet:$PYTHONPATH
-        echo $PYTHONPATH
         bash scripts/infer_{model_name}_{prec}_accuracy.sh
         bash scripts/infer_{model_name}_{prec}_performance.sh
         """
 
+        if model_name == "transformer_asr":
+            script = f"""
+            cd ../{model['relative_path']}
+            python3 inference.py hparams/train_ASR_transformer.yaml --data_folder=/home/data/speechbrain/aishell --engine_path transformer.engine 
+            """
+
         r, t = run_script(script)
         sout = r.stdout
-        pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*"
-        matchs = re.findall(pattern, sout)
-        for m in matchs:
-            result["result"].setdefault(prec, {"status": "FAIL"})
-            try:
-                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1]), m[2]: float(m[3])}
-            except ValueError:
-                print("The string cannot be converted to a float.")
-                result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
         pattern = METRIC_PATTERN
         matchs = re.findall(pattern, sout)
-        if matchs and len(matchs) == 1:
-            result["result"].setdefault(prec, {})
-            result["result"][prec].update(get_metric_result(matchs[0]))
+        result["result"].setdefault(prec, {"status": "FAIL"})
+        logging.debug(f"matchs:\n{matchs}")
+        for m in matchs:
+            result["result"][prec].update(get_metric_result(m))
+        if len(matchs) == 2:
             result["result"][prec]["status"] = "PASS"
+
         result["result"][prec]["Cost time (s)"] = t
         logging.debug(f"matchs:\n{matchs}")
     return result
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
index 2b7a4df7..089d9860 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
@@ -20,7 +20,6 @@ import subprocess
 import time
 
 from typing import Any, Dict, Tuple
-import virtualenv
 from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
 from prompt_toolkit.styles import Style
 
@@ -33,7 +32,6 @@ import argparse
 from general_perf.core.configs.workload_store import load_workload
 from general_perf.core.configs.dataset_store import load_dataset
 from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-from general_perf.tools.build_pdf import build_pdf
 
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("PerfEngine")
@@ -208,7 +206,7 @@ class PerfEngine:
                     workload['data_percent'])
             diff_results = AccuracyChecker.calculate_diff()
             accuracy_report.update(diff_results)
-            accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
+            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
 
         if accuracy_report:
             base_report['Accuracy'] = accuracy_report
@@ -242,11 +240,6 @@ class PerfEngine:
         log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
                  format(output_dir[output_dir.rfind('general_perf'):],
                  os.path.basename(output_report_path)))
-        build_pdf(output_report_path)
-        log.info("PDF Version is saved in path: [ {}/{}-TO-{}.pdf ]".format(
-            output_dir[output_dir.rfind('general_perf'):],
-            base_report['Model'],
-            output_report_path.split('/')[-1].split('-')[1].upper()))
 
         return compile_info["compile_status"]
 
@@ -341,46 +334,7 @@ class PerfEngine:
         return answer
 
     def activate_venv(self, hardware_type: str) -> bool:
-        if os.path.exists('general_perf/backends/' + hardware_type +
-                          '/requirements.txt'):
-            log.info("Activating Virtual Env for " + hardware_type)
-
-            venv_dir = os.path.join("general_perf/backends",
-                                    hardware_type + "/venv")
-            activate_file = os.path.join(venv_dir, 'bin', 'activate_this.py')
-            if not os.path.exists(venv_dir):
-                log.info("venv not exist, Creating Virtual Env for " +
-                         hardware_type)
-                if (hardware_type == "HPU"):
-                    virtualenv.create_environment(venv_dir,True)
-                else:
-                    virtualenv.create_environment(venv_dir)
-                exec(open(activate_file).read(), {'__file__': activate_file})
-                python_path = os.path.join(venv_dir, 'bin', 'python3')
-                subprocess.call([
-                    python_path, '-m', 'pip', 'install', '--upgrade', 'pip', '--quiet'
-                ])
-                subprocess.call([
-                    python_path, '-m', 'pip', 'install', '-r', 'general_perf/backends/' +
-                    hardware_type + '/requirements.txt', '-q'
-                ])
-            else:
-                exec(open(activate_file).read(), {'__file__': activate_file})
-                '''
-                just in case install failed in pre-run.
-                '''
-                python_path = os.path.join(venv_dir, 'bin', 'python3')
-                subprocess.call([
-                    python_path, '-m', 'pip', 'install', '--upgrade', 'pip', '--quiet'
-                ])
-                subprocess.call([
-                    python_path, '-m', 'pip', 'install', '-r', 'general_perf/backends/' +
-                    hardware_type + '/requirements.txt', '-q'
-                ])
-
-                if not hasattr(sys, 'real_prefix'):
-                    return False
-                return True
+        
         return True
 
     def deactivate_venv(self):
-- 
Gitee


From 3e4171ba871a241e575e5ff914eb1481dab85be9 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Fri, 10 Jan 2025 13:09:57 +0800
Subject: [PATCH 17/35] update speech

---
 models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh   | 2 +-
 .../speech_recognition/transformer_asr/ixrt/ci/prepare.sh       | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
index aff3ca58..ec09d6ac 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
+++ b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
@@ -26,7 +26,7 @@ else
 fi
 
 mkdir -p conformer_checkpoints
-ln -s /home/deepspark/datasets/INFER/conformer/20210601_u2++_conformer_exp_aishell ./conformer_checkpoints
+ln -s /root/data/checkpoints/20210601_u2++_conformer_exp_aishell ./conformer_checkpoints
 
 cp -r /root/data/datasets/AISHELL/data_aishell ./
 bash scripts/aishell_data_prepare.sh data_aishell tools
\ No newline at end of file
diff --git a/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
index 9e4e9403..9408f73a 100644
--- a/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
+++ b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
@@ -26,6 +26,7 @@ else
 fi
 
 mkdir -p results/transformer/8886/save
+cp -r /root/data/checkpoints/8886 results/transformer/8886
 mkdir -p /home/data/speechbrain/aishell/csv_data
 cp -r /root/data/datasets/AISHELL/data_aishell /home/data/speechbrain/aishell
 cp results/transformer/8886/*.csv /home/data/speechbrain/aishell/csv_data
-- 
Gitee


From 37055b8ef8b34f16c8ea822b7e62e6b6a6ac4710 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 14 Jan 2025 09:43:00 +0800
Subject: [PATCH 18/35] update mertric log

---
 README.md                                              |  2 +-
 models/cv/face/facenet/ixrt/README.md                  |  3 ++-
 models/cv/face/facenet/ixrt/ci/prepare.sh              |  2 ++
 .../lightweight_openpose/ixrt/inference_accuracy.py    |  3 +++
 .../lightweight_openpose/ixrt/inference_performance.py |  1 +
 models/cv/segmentation/mask_rcnn/ixrt/scripts/init.sh  |  1 +
 models/cv/segmentation/solov1/ixrt/ci/prepare.sh       |  1 +
 models/cv/segmentation/solov1/ixrt/solov1_inference.py |  3 ++-
 .../multimodal/text_and_image/clip/ixrt/inference.py   |  4 ++--
 tests/run_ixrt.py                                      | 10 +++++++++-
 10 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index b5132821..a3dc5e12 100644
--- a/README.md
+++ b/README.md
@@ -906,7 +906,7 @@ DeepSparkInference将按季度进行版本更新，后续会逐步丰富模型
         <td rowspan=2>Lightweight OpenPose</td>
         <td>FP16</td>
         <td>-</td>
-        <td><a href="models/cv/pose_estimation/lightweightopenpose/ixrt/README.md#fp16">Supported</a></td>
+        <td><a href="models/cv/pose_estimation/lightweight_openpose/ixrt/README.md#fp16">Supported</a></td>
     </tr>
     <tr align="center">
         <td>INT8</td>
diff --git a/models/cv/face/facenet/ixrt/README.md b/models/cv/face/facenet/ixrt/README.md
index 2aed8d25..36ee33db 100644
--- a/models/cv/face/facenet/ixrt/README.md
+++ b/models/cv/face/facenet/ixrt/README.md
@@ -37,8 +37,9 @@ unzip 20180408-102900.zip
 mkdir -p checkpoints
 mkdir -p facenet_weights
 git clone https://github.com/timesler/facenet-pytorch
+# facenet-pytorch/dependencies/facenet is submodule, pls make sure it has been cloned or you can clone directly from https://github.com/davidsandberg/facenet/tree/096ed770f163957c1e56efa7feeb194773920f6e
 mv /Path/facenet/ixrt/tensorflow2pytorch.py facenet-pytorch
-python3 /facenet-pytorch/tensorflow2pytorch.py \
+python3 ./facenet-pytorch/tensorflow2pytorch.py \
         --facenet_weights_path ./facenet_weights \
         --facenet_pb_path ./20180408-102900 \
         --onnx_save_name facenet_export.onnx
diff --git a/models/cv/face/facenet/ixrt/ci/prepare.sh b/models/cv/face/facenet/ixrt/ci/prepare.sh
index b1882a4b..9d7081e7 100644
--- a/models/cv/face/facenet/ixrt/ci/prepare.sh
+++ b/models/cv/face/facenet/ixrt/ci/prepare.sh
@@ -24,6 +24,8 @@ elif [[ ${ID} == "centos" ]]; then
 else
     echo "Not Support Os"
 fi
+
+pip3 install -r requirements.txt
 unzip -q /root/data/checkpoints/20180408-102900.zip -d ./
 unzip -q /root/data/datasets/facenet_datasets.zip -d ./
 mkdir -p checkpoints
diff --git a/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_accuracy.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_accuracy.py
index ccb1dab0..57118a76 100644
--- a/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_accuracy.py
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_accuracy.py
@@ -296,7 +296,10 @@ def evaluate(labels, output_name, images_folder, engine, context, config, multis
 def main(config):
     engine, context = openpose_trtapi_ixrt(config)
     print(" config and load model ok...")
+    start_time = time.time()
     evaluate(config.labels, config.output_name, config.images_folder, engine, context, config)
+    e2e_time = time.time() - start_time
+    print(F"E2E time : {e2e_time:.3f} seconds")
     print(" done ...")
     
 
diff --git a/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_performance.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_performance.py
index d472d6d6..f22c6ab4 100644
--- a/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_performance.py
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_performance.py
@@ -133,6 +133,7 @@ def main(config):
         forward_time = end_time - start_time
 
         fps = config.run_loop * config.bsz / forward_time
+        print("FPS : ", fps)
         print(f"\nCheck FPS         Test : {fps}    Target:{config.fps_target}   State : {'Pass' if fps >= config.fps_target else 'Fail'}")
 
 
diff --git a/models/cv/segmentation/mask_rcnn/ixrt/scripts/init.sh b/models/cv/segmentation/mask_rcnn/ixrt/scripts/init.sh
index bd1fe117..3b0949ae 100644
--- a/models/cv/segmentation/mask_rcnn/ixrt/scripts/init.sh
+++ b/models/cv/segmentation/mask_rcnn/ixrt/scripts/init.sh
@@ -31,6 +31,7 @@ bash prepare_system_env.sh
 #pip3 install opencv-python==4.6.0.66
 pip3 install pycocotools==2.0.7
 pip3 install tqdm
+pip3 install pycuda
 
 # build engine
 cd ../python
diff --git a/models/cv/segmentation/solov1/ixrt/ci/prepare.sh b/models/cv/segmentation/solov1/ixrt/ci/prepare.sh
index 58447425..09fa1878 100644
--- a/models/cv/segmentation/solov1/ixrt/ci/prepare.sh
+++ b/models/cv/segmentation/solov1/ixrt/ci/prepare.sh
@@ -44,5 +44,6 @@ cd ..
 
 mkdir -p checkpoints
 ln -s /root/data/checkpoints/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth ./
+ln -s /root/data/datasets/coco ./
 python3 solo_torch2onnx.py --cfg ./solo_r50_fpn_3x_coco.py --checkpoint ./solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth --batch_size 1
 mv r50_solo_bs1_800x800.onnx ./checkpoints/r50_solo_bs1_800x800.onnx
\ No newline at end of file
diff --git a/models/cv/segmentation/solov1/ixrt/solov1_inference.py b/models/cv/segmentation/solov1/ixrt/solov1_inference.py
index e2f0ddf0..594286c0 100644
--- a/models/cv/segmentation/solov1/ixrt/solov1_inference.py
+++ b/models/cv/segmentation/solov1/ixrt/solov1_inference.py
@@ -152,6 +152,7 @@ def main():
         print(f"Check segm_mAP Test : {round(segm_mAP,3)}  Target:{args.target_map} State : {'Pass' if round(segm_mAP,3) >= args.target_map else 'Fail'}")
         metricResult["metricResult"]["segm_mAP"] = round(segm_mAP, 3)
         status_map = check_target(segm_mAP, args.target_map)
+        print(metricResult)
         sys.exit(int(not (status_map)))   
     else:
         torch.cuda.synchronize()
@@ -168,8 +169,8 @@ def main():
         print(f"Check fps Test : {round(fps,3)}  Target:{args.target_fps} State : {'Pass' if  fps >= args.target_fps else 'Fail'}")
         metricResult["metricResult"]["FPS"] = round(fps, 3)
         status_fps = check_target(fps, args.target_fps)
+        print(metricResult)
         sys.exit(int(not (status_fps)))
-    print(metricResult)
     
 if __name__ == "__main__":
     
diff --git a/models/multimodal/text_and_image/clip/ixrt/inference.py b/models/multimodal/text_and_image/clip/ixrt/inference.py
index 5821b7e9..e690c988 100644
--- a/models/multimodal/text_and_image/clip/ixrt/inference.py
+++ b/models/multimodal/text_and_image/clip/ixrt/inference.py
@@ -26,7 +26,7 @@ from transformers import CLIPProcessor
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = (
-    CLIPModel.from_pretrained("/data/clip-vit-base-patch32")
+    CLIPModel.from_pretrained("data/clip-vit-base-patch32")
     .to(device)
     .half()
 )
@@ -69,7 +69,7 @@ with torch.no_grad():
         )  # we can take the softmax to get the label probabilities
         print(probs[:5])
         print(probs[-5:-1])
-        metricResult["metricResult"]["QPS-{batch_size}"] = round(batch_size / (end_time - start_time), 3)
+        metricResult["metricResult"][f"QPS-batch_size-{batch_size}"] = round(batch_size / (end_time - start_time), 3)
         print("QPS: ", batch_size / (end_time - start_time))
     e2e_time = time.time() - e2e_start_time
     metricResult["metricResult"]["E2E time"] = round(e2e_time, 3)
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index 9bb9a1ed..de933af5 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -240,7 +240,10 @@ def run_detec_testcase(model):
         """
 
         if model_name == "rtmpose":
-            script = "python3 predict.py --model data/rtmpose/rtmpose_opt.onnx --precision fp16 --img_path demo/demo.jpg"
+            script = f"""
+                cd ../{model['relative_path']}
+                python3 predict.py --model data/rtmpose/rtmpose_opt.onnx --precision fp16 --img_path demo/demo.jpg
+                """
 
         r, t = run_script(script)
         sout = r.stdout
@@ -398,6 +401,11 @@ def run_nlp_testcase(model):
                 bash script/build_engine.sh --bs 32 --int8
                 bash script/inference_squad.sh --bs 32 --int8
                 """
+        elif model_name == "clip":
+            script = f"""
+            cd ../{model['relative_path']}
+            python3 inference.py
+            """
 
         r, t = run_script(script)
         sout = r.stdout
-- 
Gitee


From d59eb44a424787310ea1ca7c55907bce7e70ea2d Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 14 Jan 2025 13:31:38 +0800
Subject: [PATCH 19/35] update bytemlperf model

---
 .../swin_transformer_large/ixrt/README.md     |   6 +-
 .../swin_transformer_large/ixrt/ci/prepare.sh |   4 +-
 .../nlp/language_model/albert/ixrt/README.md  |   6 +-
 .../language_model/albert/ixrt/ci/prepare.sh  |  10 +-
 .../albert/ixrt/requirements.txt              |   4 +-
 .../nlp/language_model/deberta/ixrt/README.md |   6 +-
 .../language_model/deberta/ixrt/ci/prepare.sh |  10 +-
 .../deberta/ixrt/requirements.txt             |   4 +-
 .../nlp/language_model/roberta/ixrt/README.md |   6 +-
 .../language_model/roberta/ixrt/ci/prepare.sh |  13 +-
 .../roberta/ixrt/requirements.txt             |   3 +-
 .../language_model/roformer/ixrt/README.md    |   6 +-
 .../roformer/ixrt/ci/prepare.sh               |  10 +-
 .../roformer/ixrt/requirements.txt            |   3 +-
 .../language_model/videobert/ixrt/README.md   |   6 +-
 .../videobert/ixrt/ci/prepare.sh              |  11 +-
 .../videobert/ixrt/requirements.txt           |   4 +-
 toolbox/ByteMLPerf/.gitignore                 |   2 +-
 .../datasets/open_cail2019/data_loader.py     |  95 ++++
 .../open_cail2019/pre_process_data.py         |  56 +++
 .../datasets/open_cail2019/test_accuracy.py   |  45 ++
 .../datasets/open_cifar/data_loader.py        | 155 +++++++
 .../datasets/open_cifar/test_accuracy.py      |  49 ++
 .../datasets/open_imagenet/data_loader.py     | 260 +++++++++++
 .../datasets/open_imagenet/test_accuracy.py   |  66 +++
 .../open_squad/bert/accuracy_squad.py         | 322 +++++++++++++
 .../datasets/open_squad/bert/evaluate.py      | 102 +++++
 .../datasets/open_squad/create_squad_data.py  | 427 ++++++++++++++++++
 .../datasets/open_squad/data_loader.py        | 199 ++++++++
 .../datasets/open_squad/test_accuracy.py      | 134 ++++++
 30 files changed, 1967 insertions(+), 57 deletions(-)
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/data_loader.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/pre_process_data.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/test_accuracy.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/data_loader.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/test_accuracy.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/data_loader.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/test_accuracy.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/accuracy_squad.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/evaluate.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/create_squad_data.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/test_accuracy.py

diff --git a/models/cv/classification/swin_transformer_large/ixrt/README.md b/models/cv/classification/swin_transformer_large/ixrt/README.md
index 64f4daa6..e6cf19d4 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/README.md
+++ b/models/cv/classification/swin_transformer_large/ixrt/README.md
@@ -45,8 +45,10 @@ python3 torch2onnx.py --model_path ./general_perf/model_zoo/popular/swin-large/s
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./swin-large-torch-fp32
-export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -76,6 +78,8 @@ cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/general_perf/model_z
 
 # run acc scripts
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/swin-large-torch-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/swin-large-torch-fp32.json
 python3 core/perf_engine.py --hardware_type ILUVATAR --task swin-large-torch-fp32
 ```
 
diff --git a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
index 5adf3391..0581f0d8 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
+++ b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
@@ -43,4 +43,6 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requ
 # copy data
 cp -r /root/data/datasets/open_imagenet/* ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/
 mkdir -p ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
-cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
\ No newline at end of file
+cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
+
+cp -r /root/data/3rd_party/workloads ./ByteMLPerf/byte_infer_perf/general_perf/workloads
\ No newline at end of file
diff --git a/models/nlp/language_model/albert/ixrt/README.md b/models/nlp/language_model/albert/ixrt/README.md
index 1cc0156d..2af14b2b 100644
--- a/models/nlp/language_model/albert/ixrt/README.md
+++ b/models/nlp/language_model/albert/ixrt/README.md
@@ -42,8 +42,10 @@ onnxsim albert-torch-fp32.onnx albert-torch-fp32-sim.onnx
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./albert-torch-fp32-sim
-export OPTIMIER_FILE=./ixrt-oss/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -79,6 +81,8 @@ cp ./general_perf/model_zoo/popular/open_albert/*.pt ./ByteMLPerf/byte_infer_per
 
 # run acc script
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/albert-torch-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/albert-torch-fp32.json
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/common.py
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/compile_backend_iluvatar.py
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/runtime_backend_iluvatar.py
diff --git a/models/nlp/language_model/albert/ixrt/ci/prepare.sh b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
index 41644a02..83592230 100644
--- a/models/nlp/language_model/albert/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
@@ -16,14 +16,7 @@
 
 set -x
 
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-if [[ ${ID} == "ubuntu" ]]; then
-    apt install -y libgl1-mesa-glx
-elif [[ ${ID} == "centos" ]]; then
-    yum install -y mesa-libGL
-else
-    echo "Not Support Os"
-fi
+apt install -y libnuma-dev
 
 cp /root/data/3rd_party/albert-torch-fp32.json ./
 cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
@@ -55,6 +48,7 @@ cp /root/data/checkpoints/open_albert/*.pt ./ByteMLPerf/byte_infer_perf/general_
 
 # run acc script
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+cp -r /root/data/3rd_party/workloads ./
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/common.py
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/compile_backend_iluvatar.py
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/runtime_backend_iluvatar.py
\ No newline at end of file
diff --git a/models/nlp/language_model/albert/ixrt/requirements.txt b/models/nlp/language_model/albert/ixrt/requirements.txt
index cac1ba9a..7e0fcb9e 100644
--- a/models/nlp/language_model/albert/ixrt/requirements.txt
+++ b/models/nlp/language_model/albert/ixrt/requirements.txt
@@ -5,7 +5,7 @@ tqdm
 pycuda
 onnx
 tabulate
-cv2
 pycocotools
 opencv-python==4.6.0.66
-transformers==4.33.3
\ No newline at end of file
+transformers==4.33.3
+typing_extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/deberta/ixrt/README.md b/models/nlp/language_model/deberta/ixrt/README.md
index 3e5f944b..fd1757cd 100644
--- a/models/nlp/language_model/deberta/ixrt/README.md
+++ b/models/nlp/language_model/deberta/ixrt/README.md
@@ -41,8 +41,10 @@ python3 remove_clip_and_cast.py
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./deberta-sim-drop-clip-drop-invaild-cast
-export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -73,6 +75,8 @@ mv ./deberta-sim-drop-clip-drop-invaild-cast.onnx general_perf/model_zoo/popular
 mv ./general_perf/model_zoo/popular/ ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/
 
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/deberta-torch-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/deberta-torch-fp32.json
 wget http://files.deepspark.org.cn:880/deepspark/Palak.tar
 tar -zxvf Palak.tar
 
diff --git a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
index 7a113b5a..755d4f19 100644
--- a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
@@ -16,14 +16,7 @@
 
 set -x
 
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-if [[ ${ID} == "ubuntu" ]]; then
-    apt install -y libgl1-mesa-glx
-elif [[ ${ID} == "centos" ]]; then
-    yum install -y mesa-libGL
-else
-    echo "Not Support Os"
-fi
+apt install -y libnuma-dev
 
 pip install -r requirements.txt
 
@@ -48,6 +41,7 @@ cp ./deberta-sim-drop-clip-drop-invaild-cast.onnx /root/data/checkpoints/open_de
 cp -r /root/data/checkpoints/open_deberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/
 
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+cp -r /root/data/3rd_party/workloads ./
 wget http://files.deepspark.org.cn:880/deepspark/Palak.tar
 tar -zxvf Palak.tar
 
diff --git a/models/nlp/language_model/deberta/ixrt/requirements.txt b/models/nlp/language_model/deberta/ixrt/requirements.txt
index 05393759..9dfdb081 100644
--- a/models/nlp/language_model/deberta/ixrt/requirements.txt
+++ b/models/nlp/language_model/deberta/ixrt/requirements.txt
@@ -5,8 +5,8 @@ tqdm
 pycuda
 onnx
 tabulate
-cv2
 pycocotools
 opencv-python==4.6.0.66
 tf2onnx
-transformers==4.33.3
\ No newline at end of file
+transformers==4.33.3
+typing_extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md
index 957fce3b..55478c31 100644
--- a/models/nlp/language_model/roberta/ixrt/README.md
+++ b/models/nlp/language_model/roberta/ixrt/README.md
@@ -44,8 +44,10 @@ onnxsim open_roberta/roberta-torch-fp32.onnx open_roberta/roberta-torch-fp32_sim
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./open_roberta/roberta-torch-fp32_sim
-export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -85,6 +87,8 @@ mv csarron/ ./ByteMLPerf/byte_infer_perf/
 
 # Run Acc scripts
 cd ./ByteMLPerf/byte_infer_perf/
+mkdir -p workloads
+wget -O workloads/roberta-torch-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/roberta-torch-fp32.json
 python3 general_perf/core/perf_engine.py --hardware_type ILUVATAR --task roberta-torch-fp32
 ```
 
diff --git a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
index 390aab7c..c4df075b 100644
--- a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
@@ -16,14 +16,7 @@
 
 set -x
 
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-if [[ ${ID} == "ubuntu" ]]; then
-    apt install -y libgl1-mesa-glx
-elif [[ ${ID} == "centos" ]]; then
-    yum install -y mesa-libGL
-else
-    echo "Not Support Os"
-fi
+apt install -y libnuma-dev
 
 pip install -r requirements.txt
 
@@ -53,4 +46,6 @@ cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/da
 wget http://files.deepspark.org.cn:880/deepspark/csarron.tar
 tar xf csarron.tar
 rm -f csarron.tar
-mv csarron/ ./ByteMLPerf/byte_infer_perf/
\ No newline at end of file
+mv csarron/ ./ByteMLPerf/byte_infer_perf/
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+cp -r /root/data/3rd_party/workloads ./
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/requirements.txt b/models/nlp/language_model/roberta/ixrt/requirements.txt
index b920daeb..de4e636f 100644
--- a/models/nlp/language_model/roberta/ixrt/requirements.txt
+++ b/models/nlp/language_model/roberta/ixrt/requirements.txt
@@ -2,4 +2,5 @@ onnxsim
 py-libnuma==1.2
 bert
 pycuda
-transformers==4.33.3
\ No newline at end of file
+transformers==4.33.3
+typing_extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
index 26e76714..59750879 100644
--- a/models/nlp/language_model/roformer/ixrt/README.md
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -50,8 +50,10 @@ python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --outpu
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./data/open_roformer/roformer-frozen
-export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -87,6 +89,8 @@ rm -f open_cail2019.tar
 
 # Go to general_perf/
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/roformer-tf-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/roformer-tf-fp32.json
 # Modify model_zoo/roformer-tf-fp32.json
 sed -i 's/segment:0/segment0/g; s/token:0/token0/g' model_zoo/roformer-tf-fp32.json
 # Run Acc scripts
diff --git a/models/nlp/language_model/roformer/ixrt/ci/prepare.sh b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
index 2edc9c04..5b1e9dd6 100644
--- a/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
@@ -16,14 +16,7 @@
 
 set -x
 
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-if [[ ${ID} == "ubuntu" ]]; then
-    apt install -y libgl1-mesa-glx
-elif [[ ${ID} == "centos" ]]; then
-    yum install -y mesa-libGL
-else
-    echo "Not Support Os"
-fi
+apt install -y libnuma-dev
 
 pip install -r requirements.txt
 
@@ -55,5 +48,6 @@ cp /root/data/datasets/open_cail2019/* ./ByteMLPerf/byte_infer_perf/general_perf
 
 # Go to general_perf/
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+cp -r /root/data/3rd_party/workloads ./
 # Modify model_zoo/roformer-tf-fp32.json
 sed -i 's/segment:0/segment0/g; s/token:0/token0/g' model_zoo/roformer-tf-fp32.json
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/requirements.txt b/models/nlp/language_model/roformer/ixrt/requirements.txt
index 60aeb062..a45ab311 100644
--- a/models/nlp/language_model/roformer/ixrt/requirements.txt
+++ b/models/nlp/language_model/roformer/ixrt/requirements.txt
@@ -1,4 +1,5 @@
 tf2onnx
 pycuda
 onnxsim
-py-libnuma==1.2
\ No newline at end of file
+py-libnuma==1.2
+typing_extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/videobert/ixrt/README.md b/models/nlp/language_model/videobert/ixrt/README.md
index c389eaa3..dfbe449b 100644
--- a/models/nlp/language_model/videobert/ixrt/README.md
+++ b/models/nlp/language_model/videobert/ixrt/README.md
@@ -32,8 +32,10 @@ bash ./scripts/prepare_model_and_dataset.sh
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./general_perf/model_zoo/popular/open_videobert/video-bert
-export OPTIMIER_FILE=./ixrt-oss/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -64,6 +66,8 @@ cp ./general_perf/model_zoo/popular/open_videobert/video-bert.onnx ByteMLPerf/by
 # run acc scripts
 mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/videobert-onnx-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/videobert-onnx-fp32.json
 python3 core/perf_engine.py --hardware_type ILUVATAR --task videobert-onnx-fp32
 ```
 
diff --git a/models/nlp/language_model/videobert/ixrt/ci/prepare.sh b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
index 46444091..5efc12a7 100644
--- a/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
@@ -16,14 +16,7 @@
 
 set -x
 
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-if [[ ${ID} == "ubuntu" ]]; then
-    apt install -y libgl1-mesa-glx
-elif [[ ${ID} == "centos" ]]; then
-    yum install -y mesa-libGL
-else
-    echo "Not Support Os"
-fi
+apt install -y libnuma-dev
 
 pip install -r requirements.txt
 
@@ -40,3 +33,5 @@ mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
 cp -r /root/data/datasets/open_cifar/cifar-100-python/ ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
 cp /root/data/checkpoints/open_videobert/video-bert.onnx ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+cp -r /root/data/3rd_party/workloads ./
diff --git a/models/nlp/language_model/videobert/ixrt/requirements.txt b/models/nlp/language_model/videobert/ixrt/requirements.txt
index cac1ba9a..7e0fcb9e 100644
--- a/models/nlp/language_model/videobert/ixrt/requirements.txt
+++ b/models/nlp/language_model/videobert/ixrt/requirements.txt
@@ -5,7 +5,7 @@ tqdm
 pycuda
 onnx
 tabulate
-cv2
 pycocotools
 opencv-python==4.6.0.66
-transformers==4.33.3
\ No newline at end of file
+transformers==4.33.3
+typing_extensions==4.12.2
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/.gitignore b/toolbox/ByteMLPerf/.gitignore
index 2e06b074..ad7dbe99 100644
--- a/toolbox/ByteMLPerf/.gitignore
+++ b/toolbox/ByteMLPerf/.gitignore
@@ -15,7 +15,7 @@ byte_infer_perf/general_perf/model_zoo/*
 byte_infer_perf/general_perf/download/*.*
 !byte_infer_perf/general_perf/download/README.md
 byte_infer_perf/general_perf/datasets/open_imagenet/preprocessed/
-byte_infer_perf/general_perf/datasets/*
+# byte_infer_perf/general_perf/datasets/*
 !byte_infer_perf/general_perf/datasets/fake_dataset
 !*.py
 byte_infer_perf/general_perf/reports/*
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/data_loader.py
new file mode 100644
index 00000000..709d6f76
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/data_loader.py
@@ -0,0 +1,95 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import numpy as np
+from general_perf.datasets import data_loader
+from tqdm import tqdm
+import collections
+
+log = logging.getLogger("CAIL2019")
+
+maxlen = 1024
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+
+        log.info("Initial...")
+        self.config = config
+        self.cur_bs = 2
+
+        batch_token_ids = np.load(
+            "general_perf/datasets/{}/batch_token_ids.npy".format(
+                self.config['dataset_name']),
+            allow_pickle=True)
+        batch_segment_ids = np.load(
+            "general_perf/datasets/{}/batch_segment_ids.npy".format(
+                self.config['dataset_name']),
+            allow_pickle=True)
+        labels = np.load("general_perf/datasets/{}/label.npy".format(
+            self.config['dataset_name']),
+                         allow_pickle=True)
+        self.feed_dict = collections.defaultdict(list)
+        self.feed_dict['batch_token_ids'] = batch_token_ids.tolist()
+        self.feed_dict['batch_segment_ids'] = batch_segment_ids.tolist()
+        self.feed_dict['label'] = labels.tolist()
+
+        self.items = len(self.feed_dict['label'])
+        self.batch_num = int(self.items / self.cur_bs)
+
+        for i in range(self.items):
+            batch_token_id = np.pad(
+                self.feed_dict['batch_token_ids'][i],
+                (0, 1024 - len(self.feed_dict['batch_token_ids'][i])),
+                'constant').astype(np.float32)
+            batch_segment_id = np.pad(
+                self.feed_dict['batch_segment_ids'][i],
+                (0, 1024 - len(self.feed_dict['batch_segment_ids'][i])),
+                'constant').astype(np.float32)
+            self.feed_dict['batch_token_ids'][i] = batch_token_id.tolist()
+            self.feed_dict['batch_segment_ids'][i] = batch_segment_id.tolist()
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.cur_bs, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        self.labels = []
+        for i in tqdm(range(self.batch_num)):
+            split_data = {
+                'input_segment:0':
+                self.feed_dict["batch_segment_ids"][i * self.cur_bs:(i + 1) *
+                                                    self.cur_bs],
+                'input_token:0':
+                self.feed_dict["batch_token_ids"][i * self.cur_bs:(i + 1) *
+                                                  self.cur_bs],
+            }
+            self.labels.append(
+                self.feed_dict["label"][i * self.cur_bs:(i + 1) * self.cur_bs])
+            self.batched_data.append(split_data)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/pre_process_data.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/pre_process_data.py
new file mode 100644
index 00000000..ce353805
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/pre_process_data.py
@@ -0,0 +1,56 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from tqdm import tqdm
+import json
+import collections
+import numpy as np
+from bert4keras.tokenizers import Tokenizer
+import jieba
+jieba.initialize()
+
+test_data = []
+with open("test.json", encoding='utf-8') as f:
+    for l in f:
+        l = json.loads(l)
+        assert l['label'] in 'BC'
+        if l['label'] == 'B':
+            test_data.append((l['A'], l['B'], l['C']))
+        else:
+            test_data.append((l['A'], l['C'], l['B']))
+
+tokenizer = Tokenizer("vocab.txt",
+                      do_lower_case=True,
+                      pre_tokenize=lambda s: jieba.cut(s, HMM=False))
+
+feed_dict = collections.defaultdict(list)
+maxlen = 1024
+for i in tqdm(range(len(test_data))):
+    (text1, text2, text3) = test_data[i]
+    token_ids, segment_ids = tokenizer.encode(text1, text2, maxlen=maxlen)
+    feed_dict["batch_token_ids"].append(token_ids)
+    feed_dict["batch_segment_ids"].append(segment_ids)
+    feed_dict["label"].append([1])
+    token_ids, segment_ids = tokenizer.encode(text1, text3, maxlen=maxlen)
+    feed_dict["batch_token_ids"].append(token_ids)
+    feed_dict["batch_segment_ids"].append(segment_ids)
+    feed_dict["label"].append([0])
+
+np.save("{}.npy".format('batch_token_ids'),
+        feed_dict["batch_token_ids"],
+        allow_pickle=True)
+np.save("{}.npy".format('batch_segment_ids'),
+        feed_dict["batch_segment_ids"],
+        allow_pickle=True)
+np.save("{}.npy".format('label'), feed_dict["label"], allow_pickle=True)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/test_accuracy.py
new file mode 100644
index 00000000..5fd917f6
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/test_accuracy.py
@@ -0,0 +1,45 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numpy as np
+from general_perf.datasets import test_accuracy
+from tqdm import tqdm
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+        good, total = 0, 0
+        diffs = []
+        for i in tqdm(range(num)):
+            test_data, labels = self.dataloader.get_samples(i)
+
+            results = self.runtime_backend.predict(test_data)
+            results = results[list(results)[0]]
+            diffs.append(results)
+
+            total += len(results) // 2
+            good += (results[::2] > results[1::2]).sum()
+
+        accuracy = round((good / total), 5)
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs)
+        log.info('Batch size is {}, Accuracy: {}'.format(
+            self.dataloader.cur_bs, accuracy))
+        return {"Top-1": accuracy}
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/data_loader.py
new file mode 100644
index 00000000..4bef7f72
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/data_loader.py
@@ -0,0 +1,155 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import logging
+
+import numpy as np
+import os
+import pickle
+from tqdm import tqdm
+from typing import Any
+from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
+from PIL import Image
+try:
+    from torchvision.transforms import InterpolationMode
+    BICUBIC = InterpolationMode.BICUBIC
+except ImportError:
+    BICUBIC = Image.BICUBIC
+
+from general_perf.datasets import data_loader
+
+log = logging.getLogger("CIFAR100")
+
+INPUT_TYPE = {
+    "UINT8": np.uint8,
+    "FLOAT32": np.float32,
+    "LONG": np.long,
+    "INT32": np.int32,
+    "INT64": np.int64
+}
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+        log.info("Initial...")
+
+        base_folder = "general_perf/datasets/{}/cifar-100-python".format(
+            self.config['dataset_name'])
+        test_list = [
+            ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],
+        ]
+        meta = {
+            'filename': 'meta',
+            'key': 'fine_label_names',
+            'md5': '7973b15100ade9c7d40fb424638fde48',
+        }
+
+        self.data: Any = []
+        self.targets = []
+
+        # now load the picked numpy arrays
+        for file_name, checksum in test_list:
+            file_path = os.path.join(base_folder, file_name)
+            with open(file_path, 'rb') as f:
+                entry = pickle.load(f, encoding='latin1')
+                self.data.append(entry['data'])
+                if 'labels' in entry:
+                    self.targets.extend(entry['labels'])
+                else:
+                    self.targets.extend(entry['fine_labels'])
+
+        self.data = np.vstack(self.data).reshape(-1, 3, 32, 32)
+        self.data = self.data.transpose((0, 2, 3, 1))  # convert to HWC
+
+        transformer = _transform()
+        path = os.path.join(base_folder, meta['filename'])
+        with open(path, 'rb') as infile:
+            data = pickle.load(infile, encoding='latin1')
+            self.classes = data[meta['key']]
+        self.class_to_idx = {
+            _class: i
+            for i, _class in enumerate(self.classes)
+        }
+        self.test_data = []
+        for i in tqdm(range(len(self.data))):
+            img = self.data[i]
+            img = Image.fromarray(img)
+            img = transformer(img).detach().numpy()
+            self.test_data.append(img)
+        self.text_input = np.load(os.path.join(base_folder, 'text.npy'))
+        self.config = config
+        self.cur_bs = 1
+        self.items = len(self.data)
+        self.batch_num = int(self.items / self.cur_bs)
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.cur_bs, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        self.labels = []
+        for i in tqdm(range(self.batch_num)):
+            split_data = {
+                'image': self.test_data[i * self.cur_bs:(i + 1) * self.cur_bs],
+                'text': self.text_input,
+            }
+            self.labels.append(self.targets[i * self.cur_bs:(i + 1) *
+                                            self.cur_bs])
+            self.batched_data.append(split_data)
+
+    def get_fake_samples(self, batch_size, shape, input_type):
+        data = {}
+        if input_type:
+            i = 0
+            for key, val in shape.items():
+                if key == "image":
+                    val = [val[0] * batch_size] + val[1:]
+                    data[key] = np.random.random(size=val).astype(
+                        INPUT_TYPE[input_type[i]])
+                else:
+                    data[key] = np.random.random(size=val).astype(
+                        INPUT_TYPE[input_type[i]])
+                i += 1
+            return data
+        else:
+            raise ValueError("Please provide input type")
+
+
+def _convert_image_to_rgb(image):
+    return image.convert("RGB")
+
+
+def _transform():
+    return Compose([
+        Resize(224, interpolation=BICUBIC),
+        CenterCrop(224),
+        _convert_image_to_rgb,
+        ToTensor(),
+        Normalize((0.48145466, 0.4578275, 0.40821073),
+                  (0.26862954, 0.26130258, 0.27577711)),
+    ])
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/test_accuracy.py
new file mode 100644
index 00000000..cbe6a860
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/test_accuracy.py
@@ -0,0 +1,49 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numpy as np
+from general_perf.datasets import test_accuracy
+from tqdm import tqdm
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+        good, total = 0, 0
+        diffs = []
+        for i in tqdm(range(num)):
+            test_data, labels = self.dataloader.get_samples(i)
+            logits_per_image, logits_per_text = self.runtime_backend.predict(
+                test_data)
+            diffs.append(logits_per_image)
+
+            for j in range(len(logits_per_image)):
+                probs = logits_per_image[j]
+
+                if np.argmax(probs) == labels[j]:
+                    good += 1
+                total += 1
+
+        accuracy = round((good / total), 5)
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs,
+                allow_pickle=True)
+        log.info('Batch size is {}, Accuracy: {}'.format(
+            self.dataloader.cur_bs, accuracy))
+        return {"Top-1": accuracy}
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/data_loader.py
new file mode 100644
index 00000000..488ae1ab
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/data_loader.py
@@ -0,0 +1,260 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+from os.path import split
+import re
+import time
+
+import cv2
+import numpy as np
+import random
+from tqdm import tqdm
+
+from general_perf.datasets import data_loader
+
+log = logging.getLogger("Imagenet")
+
+INPUT_TYPE = {
+    "UINT8": np.uint8,
+    "FLOAT32": np.float32,
+}
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+        log.info("Initial...")
+
+        self.config = config
+        self.cur_bs = 1
+        self.image_size = [224, 224, 3]
+
+        if self.config['framework'] == 'Tensorflow':
+            image_format = "NHWC"
+            pre_process = pre_process_vgg
+        else:
+            image_format = "NCHW"
+            if 'resnet50' in self.config['model']:
+                pre_process = pre_process_imagenet_pytorch
+            else:
+                pre_process = pre_process_imagenet_vit
+
+        cache_dir = os.getcwd() + \
+            "/general_perf/datasets/{}".format(self.config['dataset_name'])
+        self.input_name = self.config['inputs']
+        self.image_list = []
+        self.label_list = []
+        self.count = None
+        self.use_cache = 0
+        self.cache_dir = os.path.join(cache_dir, "preprocessed",
+                                      self.config['model'])
+        self.data_path = "general_perf/datasets/{}/ILSVRC2012_img_val".format(
+            self.config['dataset_name'])
+        self.pre_process = pre_process
+        self.items = 0
+        # input images are in HWC
+        self.need_transpose = True if image_format == "NCHW" else False
+        not_found = 0
+        os.makedirs(self.cache_dir, exist_ok=True)
+
+        image_list = 'general_perf/datasets/{}/val_map.txt'.format(
+            self.config['dataset_name'])
+
+        start = time.time()
+        with open(image_list, 'r') as f:
+            for s in tqdm(f):
+                image_name, label = re.split(r"\s+", s.strip())
+                src = os.path.join(self.data_path, image_name)
+                if not os.path.exists(src):
+                    # if the image does not exists ignore it
+                    not_found += 1
+                    continue
+                os.makedirs(os.path.dirname(
+                    os.path.join(self.cache_dir, image_name)),
+                            exist_ok=True)
+                dst = os.path.join(self.cache_dir, image_name)
+                if not os.path.exists(dst + ".npy"):
+                    img_org = cv2.imread(src)
+                    processed = self.pre_process(
+                        img_org,
+                        need_transpose=self.need_transpose,
+                        dims=self.image_size)
+                    np.save(dst, processed)
+
+                self.image_list.append(image_name)
+                self.label_list.append(int(label) + 1)
+                self.items = len(self.image_list)
+
+                # limit the dataset if requested
+                if self.count and len(self.image_list) >= self.count:
+                    break
+
+        time_taken = time.time() - start
+        if not self.image_list:
+            log.error("no images in image list found")
+            raise ValueError("no images in image list found")
+        if not_found > 0:
+            log.info("reduced image list, %d images not found", not_found)
+
+        log.info("loaded {} images, cache={}, took={:.1f}sec".format(
+            len(self.image_list), self.use_cache, time_taken))
+
+        self.label_list = np.array(self.label_list)
+        self.batch_num = int(self.items / self.cur_bs)
+        self.shuffle_index = [i for i in range(self.items)]
+        random.seed(7)
+        random.shuffle(self.shuffle_index)
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.cur_bs, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        self.labels = []
+        for i in tqdm(range(self.batch_num)):
+            split_data, labels = [], []
+            for j in range(i * self.cur_bs, (i + 1) * self.cur_bs):
+                output, label = self.get_item(self.shuffle_index[j])
+                split_data.append(output)
+                labels.append(label)
+
+            self.labels.append(labels)
+            self.batched_data.append({self.input_name: np.array(split_data)})
+
+    def get_samples(self, sample_id):
+        if sample_id >= len(self.batched_data) or sample_id < 0:
+            raise ValueError("Your Input ID: {} is out of range: {}".format(
+                sample_id, len(self.batched_data)))
+        return self.batched_data[sample_id], self.labels[sample_id]
+
+    def get_item(self, nr):
+        """Get image by number in the list."""
+        dst = os.path.join(self.cache_dir, self.image_list[nr])
+        img = np.load(dst + ".npy")
+        return img, self.label_list[nr]
+
+
+#
+# pre-processing
+#
+def center_crop(img, out_height, out_width):
+    height, width, _ = img.shape
+    left = int((width - out_width) / 2)
+    right = int((width + out_width) / 2)
+    top = int((height - out_height) / 2)
+    bottom = int((height + out_height) / 2)
+    img = img[top:bottom, left:right]
+    return img
+
+
+def resize_with_aspectratio(img,
+                            out_height,
+                            out_width,
+                            scale=87.5,
+                            inter_pol=cv2.INTER_LINEAR):
+    height, width, _ = img.shape
+    new_height = int(100. * out_height / scale)
+    new_width = int(100. * out_width / scale)
+    if height > width:
+        w = new_width
+        h = int(new_height * height / width)
+    else:
+        h = new_height
+        w = int(new_width * width / height)
+    img = cv2.resize(img, (w, h), interpolation=inter_pol)
+    return img
+
+
+def pre_process_vgg(img, dims=None, need_transpose=False):
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+    output_height, output_width, _ = dims
+    cv2_interpol = cv2.INTER_AREA
+    img = resize_with_aspectratio(img,
+                                  output_height,
+                                  output_width,
+                                  inter_pol=cv2_interpol)
+    img = center_crop(img, output_height, output_width)
+    img = np.asarray(img, dtype='float32')
+
+    # normalize image
+    means = np.array([123.68, 116.78, 103.94], dtype=np.float32)
+    img -= means
+
+    # transpose if needed
+    if need_transpose:
+        img = img.transpose([2, 0, 1])
+    return img
+
+
+def pre_process_imagenet_pytorch(img, dims=None, need_transpose=False):
+    from PIL import Image
+    import torchvision.transforms.functional as F
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = Image.fromarray(img)
+    img = F.resize(img, 256, Image.BILINEAR)
+    img = F.center_crop(img, 224)
+    img = F.to_tensor(img)
+    img = F.normalize(img,
+                      mean=[0.485, 0.456, 0.406],
+                      std=[0.229, 0.224, 0.225],
+                      inplace=False)
+    if not need_transpose:
+        img = img.permute(1, 2, 0)  # NHWC
+    img = np.asarray(img, dtype='float32')
+    return img
+
+def pre_process_imagenet_vit(img, dims=None, need_transpose=False):
+    from PIL import Image
+    import torchvision.transforms.functional as F
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = Image.fromarray(img)
+    img = F.resize(img, 256, Image.BILINEAR)
+    img = F.center_crop(img, 384)
+    img = F.to_tensor(img)
+    img = F.normalize(img,
+                      mean=[0.485, 0.456, 0.406],
+                      std=[0.229, 0.224, 0.225],
+                      inplace=False)
+    if not need_transpose:
+        img = img.permute(1, 2, 0)  # NHWC
+    img = np.asarray(img, dtype='float32')
+    return img
+
+
+def maybe_resize(img, dims):
+    img = np.array(img, dtype=np.float32)
+    if len(img.shape) < 3 or img.shape[2] != 3:
+        # some images might be grayscale
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    if dims != None:
+        im_height, im_width, _ = dims
+        img = cv2.resize(img, (im_width, im_height),
+                         interpolation=cv2.INTER_LINEAR)
+    return img
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/test_accuracy.py
new file mode 100644
index 00000000..6275aaf2
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/test_accuracy.py
@@ -0,0 +1,66 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numpy as np
+from general_perf.datasets import test_accuracy
+from tqdm import tqdm
+import torch
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+        good, total = 0, 0
+        diffs = []
+        for i in tqdm(range(num)):
+            test_data, labels = self.dataloader.get_samples(i)
+
+            results = self.runtime_backend.predict(test_data)
+            if "resnet50-tf-fp16" in self.configs["model"]:
+                if 'classes' in results:
+                    del results['classes']
+            results = self._post_processing(results, self.configs['framework'])
+            diffs.append(results)
+            for j in range(len(results)):
+                if np.argmax(results[j]) == labels[j]:
+                    good += 1
+                total += 1
+        accuracy = round((good / total), 5)
+        log.info('Batch size is {}, Accuracy: {}'.format(
+            self.dataloader.cur_bs, accuracy))
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs)
+        return {"Top-1": accuracy}
+
+    def _post_processing(self, inputs, framework):
+        if framework == "Onnx":
+            if isinstance(inputs, list):
+                inputs = list(inputs[0])
+            elif isinstance(inputs, dict):
+                key = list(inputs.keys())[0]
+                inputs = list(inputs[key])
+        else:
+            if isinstance(inputs, tuple):
+                inputs = inputs[0].float().cpu().numpy().astype(float) if inputs[0].dtype==torch.bfloat16 else inputs[0].cpu().numpy().astype(float)
+            else:
+                inputs = inputs[list(inputs)[0]]
+        if framework == "Pytorch" or framework == "Onnx":
+            inputs = np.array(
+                [np.insert(inputs[i], 0, 0) for i in range(len(inputs))])
+        return inputs
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/accuracy_squad.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/accuracy_squad.py
new file mode 100644
index 00000000..18c97dd4
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/accuracy_squad.py
@@ -0,0 +1,322 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import collections
+import json
+import math
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+import numpy as np
+import six
+from bert import tokenization
+
+# To support feature cache.
+import pickle
+
+max_seq_length = 384
+max_query_length = 64
+doc_stride = 128
+
+RawResult = collections.namedtuple("RawResult",
+                                   ["unique_id", "start_logits", "end_logits"])
+
+dtype_map = {
+    "int8": np.int8,
+    "int16": np.int16,
+    "int32": np.int32,
+    "int64": np.int64,
+    "float16": np.float16,
+    "float32": np.float32,
+    "float64": np.float64
+}
+
+
+def get_final_text(pred_text, orig_text, do_lower_case):
+    """Project the tokenized prediction back to the original text."""
+
+    # When we created the data, we kept track of the alignment between original
+    # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So
+    # now `orig_text` contains the span of our original text corresponding to the
+    # span that we predicted.
+    #
+    # However, `orig_text` may contain extra characters that we don't want in
+    # our prediction.
+    #
+    # For example, let's say:
+    #     pred_text = steve smith
+    #     orig_text = Steve Smith's
+    #
+    # We don't want to return `orig_text` because it contains the extra "'s".
+    #
+    # We don't want to return `pred_text` because it's already been normalized
+    # (the SQuAD eval script also does punctuation stripping/lower casing but
+    # our tokenizer does additional normalization like stripping accent
+    # characters).
+    #
+    # What we really want to return is "Steve Smith".
+    #
+    # Therefore, we have to apply a semi-complicated alignment heruistic between
+    # `pred_text` and `orig_text` to get a character-to-charcter alignment. This
+    # can fail in certain cases in which case we just return `orig_text`.
+
+    def _strip_spaces(text):
+        ns_chars = []
+        ns_to_s_map = collections.OrderedDict()
+        for (i, c) in enumerate(text):
+            if c == " ":
+                continue
+            ns_to_s_map[len(ns_chars)] = i
+            ns_chars.append(c)
+        ns_text = "".join(ns_chars)
+        return (ns_text, ns_to_s_map)
+
+    # We first tokenize `orig_text`, strip whitespace from the result
+    # and `pred_text`, and check if they are the same length. If they are
+    # NOT the same length, the heuristic has failed. If they are the same
+    # length, we assume the characters are one-to-one aligned.
+    tokenizer = tokenization.BasicTokenizer(do_lower_case=do_lower_case)
+
+    tok_text = " ".join(tokenizer.tokenize(orig_text))
+    start_position = tok_text.find(pred_text)
+    if start_position == -1:
+        return orig_text
+    end_position = start_position + len(pred_text) - 1
+
+    (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text)
+    (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text)
+
+    if len(orig_ns_text) != len(tok_ns_text):
+        return orig_text
+
+    # We then project the characters in `pred_text` back to `orig_text` using
+    # the character-to-character alignment.
+    tok_s_to_ns_map = {}
+    for (i, tok_index) in six.iteritems(tok_ns_to_s_map):
+        tok_s_to_ns_map[tok_index] = i
+
+    orig_start_position = None
+    if start_position in tok_s_to_ns_map:
+        ns_start_position = tok_s_to_ns_map[start_position]
+        if ns_start_position in orig_ns_to_s_map:
+            orig_start_position = orig_ns_to_s_map[ns_start_position]
+
+    if orig_start_position is None:
+        return orig_text
+
+    orig_end_position = None
+    if end_position in tok_s_to_ns_map:
+        ns_end_position = tok_s_to_ns_map[end_position]
+        if ns_end_position in orig_ns_to_s_map:
+            orig_end_position = orig_ns_to_s_map[ns_end_position]
+
+    if orig_end_position is None:
+        return orig_text
+
+    output_text = orig_text[orig_start_position:(orig_end_position + 1)]
+    return output_text
+
+
+def _get_best_indexes(logits, n_best_size):
+    """Get the n-best logits from a list."""
+    index_and_score = sorted(enumerate(logits),
+                             key=lambda x: x[1],
+                             reverse=True)
+
+    best_indexes = []
+    for i in range(len(index_and_score)):
+        if i >= n_best_size:
+            break
+        best_indexes.append(index_and_score[i][0])
+    return best_indexes
+
+
+def _compute_softmax(scores):
+    """Compute softmax probability over raw logits."""
+    if not scores:
+        return []
+
+    max_score = None
+    for score in scores:
+        if max_score is None or score > max_score:
+            max_score = score
+
+    exp_scores = []
+    total_sum = 0.0
+    for score in scores:
+        x = math.exp(score - max_score)
+        exp_scores.append(x)
+        total_sum += x
+
+    probs = []
+    for score in exp_scores:
+        probs.append(score / total_sum)
+    return probs
+
+
+def write_predictions(all_examples,
+                      all_features,
+                      all_results,
+                      n_best_size,
+                      max_answer_length,
+                      do_lower_case,
+                      output_prediction_file,
+                      max_examples=None):
+    """Write final predictions to the json file and log-odds of null if needed."""
+    print("Writing predictions to: %s" % (output_prediction_file))
+
+    example_index_to_features = collections.defaultdict(list)
+    for feature in all_features:
+        example_index_to_features[feature.example_index].append(feature)
+
+    unique_id_to_result = {}
+    for result in all_results:
+        unique_id_to_result[result.unique_id] = result
+
+    _PrelimPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+        "PrelimPrediction", [
+            "feature_index", "start_index", "end_index", "start_logit",
+            "end_logit"
+        ])
+
+    all_predictions = collections.OrderedDict()
+    all_nbest_json = collections.OrderedDict()
+    scores_diff_json = collections.OrderedDict()
+
+    for (example_index, example) in enumerate(all_examples):
+        if max_examples and example_index == max_examples: break
+
+        features = example_index_to_features[example_index]
+
+        prelim_predictions = []
+        # keep track of the minimum score of null start+end of position 0
+        score_null = 1000000  # large and positive
+        min_null_feature_index = 0  # the paragraph slice with min mull score
+        null_start_logit = 0  # the start logit at the slice with min null score
+        null_end_logit = 0  # the end logit at the slice with min null score
+        for (feature_index, feature) in enumerate(features):
+            # FIX: During compliance/audit runs, we only generate a small subset of
+            # all entries from the dataset. As a result, sometimes dict retrieval
+            # fails because a key is missing.
+            # result = unique_id_to_result[feature.unique_id]
+            result = unique_id_to_result.get(feature.unique_id, None)
+            if result is None:
+                continue
+            start_indexes = _get_best_indexes(result.start_logits, n_best_size)
+            end_indexes = _get_best_indexes(result.end_logits, n_best_size)
+            # if we could have irrelevant answers, get the min score of irrelevant
+            for start_index in start_indexes:
+                for end_index in end_indexes:
+                    # We could hypothetically create invalid predictions, e.g., predict
+                    # that the start of the span is in the question. We throw out all
+                    # invalid predictions.
+                    if start_index >= len(feature.tokens):
+                        continue
+                    if end_index >= len(feature.tokens):
+                        continue
+                    if start_index not in feature.token_to_orig_map:
+                        continue
+                    if end_index not in feature.token_to_orig_map:
+                        continue
+                    if not feature.token_is_max_context.get(
+                            start_index, False):
+                        continue
+                    if end_index < start_index:
+                        continue
+                    length = end_index - start_index + 1
+                    if length > max_answer_length:
+                        continue
+                    prelim_predictions.append(
+                        _PrelimPrediction(
+                            feature_index=feature_index,
+                            start_index=start_index,
+                            end_index=end_index,
+                            start_logit=result.start_logits[start_index],
+                            end_logit=result.end_logits[end_index]))
+
+        prelim_predictions = sorted(prelim_predictions,
+                                    key=lambda x:
+                                    (x.start_logit + x.end_logit),
+                                    reverse=True)
+
+        _NbestPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+            "NbestPrediction", ["text", "start_logit", "end_logit"])
+
+        seen_predictions = {}
+        nbest = []
+        for pred in prelim_predictions:
+            if len(nbest) >= n_best_size:
+                break
+            feature = features[pred.feature_index]
+            tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)]
+            orig_doc_start = feature.token_to_orig_map[pred.start_index]
+            orig_doc_end = feature.token_to_orig_map[pred.end_index]
+            orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)]
+            tok_text = " ".join(tok_tokens)
+
+            # De-tokenize WordPieces that have been split off.
+            tok_text = tok_text.replace(" ##", "")
+            tok_text = tok_text.replace("##", "")
+
+            # Clean whitespace
+            tok_text = tok_text.strip()
+            tok_text = " ".join(tok_text.split())
+            orig_text = " ".join(orig_tokens)
+
+            final_text = get_final_text(tok_text, orig_text, do_lower_case)
+            if final_text in seen_predictions:
+                continue
+
+            seen_predictions[final_text] = True
+            nbest.append(
+                _NbestPrediction(text=final_text,
+                                 start_logit=pred.start_logit,
+                                 end_logit=pred.end_logit))
+
+        # In very rare edge cases we could have no valid predictions. So we
+        # just create a nonce prediction in this case to avoid failure.
+        if not nbest:
+            nbest.append(
+                _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
+
+        assert len(nbest) >= 1
+
+        total_scores = []
+        best_non_null_entry = None
+        for entry in nbest:
+            total_scores.append(entry.start_logit + entry.end_logit)
+            if not best_non_null_entry:
+                if entry.text:
+                    best_non_null_entry = entry
+
+        probs = _compute_softmax(total_scores)
+
+        nbest_json = []
+        for (i, entry) in enumerate(nbest):
+            output = collections.OrderedDict()
+            output["text"] = entry.text
+            output["probability"] = probs[i]
+            output["start_logit"] = entry.start_logit
+            output["end_logit"] = entry.end_logit
+            nbest_json.append(output)
+
+        assert len(nbest_json) >= 1
+
+        all_predictions[example.qas_id] = nbest_json[0]["text"]
+
+    with open(output_prediction_file, "w") as writer:
+        writer.write(json.dumps(all_predictions, indent=4) + "\n")
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/evaluate.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/evaluate.py
new file mode 100644
index 00000000..177e136d
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/evaluate.py
@@ -0,0 +1,102 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+from collections import Counter
+import string
+import re
+import argparse
+import json
+import sys
+
+
+def normalize_answer(s):
+    """Lower text and remove punctuation, articles and extra whitespace."""
+    def remove_articles(text):
+        return re.sub(r'\b(a|an|the)\b', ' ', text)
+
+    def white_space_fix(text):
+        return ' '.join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return ''.join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def f1_score(prediction, ground_truth):
+    prediction_tokens = normalize_answer(prediction).split()
+    ground_truth_tokens = normalize_answer(ground_truth).split()
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+    num_same = sum(common.values())
+    if num_same == 0:
+        return 0
+    precision = 1.0 * num_same / len(prediction_tokens)
+    recall = 1.0 * num_same / len(ground_truth_tokens)
+    f1 = (2 * precision * recall) / (precision + recall)
+    return f1
+
+
+def exact_match_score(prediction, ground_truth):
+    return (normalize_answer(prediction) == normalize_answer(ground_truth))
+
+
+def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
+    scores_for_ground_truths = []
+    for ground_truth in ground_truths:
+        score = metric_fn(prediction, ground_truth)
+        scores_for_ground_truths.append(score)
+    return max(scores_for_ground_truths)
+
+
+def evaluate(dataset, predictions, num):
+    f1 = exact_match = total = 0
+    for article in dataset:
+        for paragraph in article['paragraphs']:
+            for qa in paragraph['qas']:
+                total += 1
+                if qa['id'] not in predictions:
+                    message = 'Unanswered question ' + qa['id'] + \
+                              ' will receive score 0.'
+                    print(message, file=sys.stderr)
+                    continue
+                ground_truths = list(map(lambda x: x['text'], qa['answers']))
+                prediction = predictions[qa['id']]
+                exact_match += metric_max_over_ground_truths(
+                    exact_match_score, prediction, ground_truths)
+                f1 += metric_max_over_ground_truths(f1_score, prediction,
+                                                    ground_truths)
+    total = num
+    exact_match = round(100.0 * exact_match / total, 5)
+    f1 = round(100.0 * f1 / total, 5)
+
+    return {'Exact Match': exact_match, 'F1 Score': f1}
+
+
+def check_accuracy(dataset_file, prediction_file, num):
+    expected_version = '1.1'
+    with open(dataset_file) as dataset_file:
+        dataset_json = json.load(dataset_file)
+        if (dataset_json['version'] != expected_version):
+            print('Evaluation expects v-' + expected_version +
+                  ', but got dataset with v-' + dataset_json['version'],
+                  file=sys.stderr)
+        dataset = dataset_json['data']
+    with open(prediction_file) as prediction_file:
+        predictions = json.load(prediction_file)
+    return evaluate(dataset, predictions, num)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/create_squad_data.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/create_squad_data.py
new file mode 100644
index 00000000..ff84c61e
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/create_squad_data.py
@@ -0,0 +1,427 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import tokenization
+import six
+
+
+class SquadExample(object):
+    """A single training/test example for simple sequence classification.
+     For examples without an answer, the start and end position are -1.
+  """
+    def __init__(self,
+                 qas_id,
+                 question_text,
+                 doc_tokens,
+                 orig_answer_text=None,
+                 start_position=None,
+                 end_position=None,
+                 is_impossible=False):
+        self.qas_id = qas_id
+        self.question_text = question_text
+        self.doc_tokens = doc_tokens
+        self.orig_answer_text = orig_answer_text
+        self.start_position = start_position
+        self.end_position = end_position
+        self.is_impossible = is_impossible
+
+    def __str__(self):
+        return self.__repr__()
+
+    def __repr__(self):
+        s = ""
+        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
+        s += ", question_text: %s" % (tokenization.printable_text(
+            self.question_text))
+        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
+        if self.start_position:
+            s += ", start_position: %d" % (self.start_position)
+        if self.start_position:
+            s += ", end_position: %d" % (self.end_position)
+        if self.start_position:
+            s += ", is_impossible: %r" % (self.is_impossible)
+        return s
+
+
+class InputFeatures(object):
+    """A single set of features of data."""
+    def __init__(self,
+                 unique_id,
+                 example_index,
+                 doc_span_index,
+                 tokens,
+                 token_to_orig_map,
+                 token_is_max_context,
+                 input_ids,
+                 input_mask,
+                 segment_ids,
+                 start_position=None,
+                 end_position=None,
+                 is_impossible=None):
+        self.unique_id = unique_id
+        self.example_index = example_index
+        self.doc_span_index = doc_span_index
+        self.tokens = tokens
+        self.token_to_orig_map = token_to_orig_map
+        self.token_is_max_context = token_is_max_context
+        self.input_ids = input_ids
+        self.input_mask = input_mask
+        self.segment_ids = segment_ids
+        self.start_position = start_position
+        self.end_position = end_position
+        self.is_impossible = is_impossible
+
+
+def read_squad_examples(input_file,
+                        is_training,
+                        version_2_with_negative=False):
+    """Read a SQuAD json file into a list of SquadExample."""
+    with open(input_file) as reader:
+        input_data = json.load(reader)["data"]
+
+    def is_whitespace(c):
+        if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
+            return True
+        return False
+
+    examples = []
+    for entry in input_data:
+        for paragraph in entry["paragraphs"]:
+            paragraph_text = paragraph["context"]
+            doc_tokens = []
+            char_to_word_offset = []
+            prev_is_whitespace = True
+            for c in paragraph_text:
+                if is_whitespace(c):
+                    prev_is_whitespace = True
+                else:
+                    if prev_is_whitespace:
+                        doc_tokens.append(c)
+                    else:
+                        doc_tokens[-1] += c
+                    prev_is_whitespace = False
+                char_to_word_offset.append(len(doc_tokens) - 1)
+
+            for qa in paragraph["qas"]:
+                qas_id = qa["id"]
+                question_text = qa["question"]
+                start_position = None
+                end_position = None
+                orig_answer_text = None
+                is_impossible = False
+                if is_training:
+
+                    if version_2_with_negative:
+                        is_impossible = qa["is_impossible"]
+                    if (len(qa["answers"]) != 1) and (not is_impossible):
+                        raise ValueError(
+                            "For training, each question should have exactly 1 answer."
+                        )
+                    if not is_impossible:
+                        answer = qa["answers"][0]
+                        orig_answer_text = answer["text"]
+                        answer_offset = answer["answer_start"]
+                        answer_length = len(orig_answer_text)
+                        start_position = char_to_word_offset[answer_offset]
+                        end_position = char_to_word_offset[answer_offset +
+                                                           answer_length - 1]
+                        # Only add answers where the text can be exactly recovered from the
+                        # document. If this CAN'T happen it's likely due to weird Unicode
+                        # stuff so we will just skip the example.
+                        #
+                        # Note that this means for training mode, every example is NOT
+                        # guaranteed to be preserved.
+                        actual_text = " ".join(
+                            doc_tokens[start_position:(end_position + 1)])
+                        cleaned_answer_text = " ".join(
+                            tokenization.whitespace_tokenize(orig_answer_text))
+                        if actual_text.find(cleaned_answer_text) == -1:
+                            print("Could not find answer: '%s' vs. '%s'",
+                                  actual_text, cleaned_answer_text)
+                            continue
+                    else:
+                        start_position = -1
+                        end_position = -1
+                        orig_answer_text = ""
+
+                example = SquadExample(qas_id=qas_id,
+                                       question_text=question_text,
+                                       doc_tokens=doc_tokens,
+                                       orig_answer_text=orig_answer_text,
+                                       start_position=start_position,
+                                       end_position=end_position,
+                                       is_impossible=is_impossible)
+                examples.append(example)
+
+    return examples
+
+
+def _check_is_max_context(doc_spans, cur_span_index, position):
+    """Check if this is the 'max context' doc span for the token."""
+
+    # Because of the sliding window approach taken to scoring documents, a single
+    # token can appear in multiple documents. E.g.
+    #  Doc: the man went to the store and bought a gallon of milk
+    #  Span A: the man went to the
+    #  Span B: to the store and bought
+    #  Span C: and bought a gallon of
+    #  ...
+    #
+    # Now the word 'bought' will have two scores from spans B and C. We only
+    # want to consider the score with "maximum context", which we define as
+    # the *minimum* of its left and right context (the *sum* of left and
+    # right context will always be the same, of course).
+    #
+    # In the example the maximum context for 'bought' would be span C since
+    # it has 1 left context and 3 right context, while span B has 4 left context
+    # and 0 right context.
+    best_score = None
+    best_span_index = None
+    for (span_index, doc_span) in enumerate(doc_spans):
+        end = doc_span.start + doc_span.length - 1
+        if position < doc_span.start:
+            continue
+        if position > end:
+            continue
+        num_left_context = position - doc_span.start
+        num_right_context = end - position
+        score = min(num_left_context,
+                    num_right_context) + 0.01 * doc_span.length
+        if best_score is None or score > best_score:
+            best_score = score
+            best_span_index = span_index
+
+    return cur_span_index == best_span_index
+
+
+def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
+                         orig_answer_text):
+    """Returns tokenized answer spans that better match the annotated answer."""
+
+    # The SQuAD annotations are character based. We first project them to
+    # whitespace-tokenized words. But then after WordPiece tokenization, we can
+    # often find a "better match". For example:
+    #
+    #   Question: What year was John Smith born?
+    #   Context: The leader was John Smith (1895-1943).
+    #   Answer: 1895
+    #
+    # The original whitespace-tokenized answer will be "(1895-1943).". However
+    # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
+    # the exact answer, 1895.
+    #
+    # However, this is not always possible. Consider the following:
+    #
+    #   Question: What country is the top exporter of electornics?
+    #   Context: The Japanese electronics industry is the lagest in the world.
+    #   Answer: Japan
+    #
+    # In this case, the annotator chose "Japan" as a character sub-span of
+    # the word "Japanese". Since our WordPiece tokenizer does not split
+    # "Japanese", we just use "Japanese" as the annotation. This is fairly rare
+    # in SQuAD, but does happen.
+    tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))
+
+    for new_start in range(input_start, input_end + 1):
+        for new_end in range(input_end, new_start - 1, -1):
+            text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
+            if text_span == tok_answer_text:
+                return (new_start, new_end)
+
+    return (input_start, input_end)
+
+
+def convert_examples_to_features(examples,
+                                 tokenizer,
+                                 max_seq_length,
+                                 doc_stride,
+                                 max_query_length,
+                                 is_training,
+                                 output_fn,
+                                 verbose_logging=False):
+    """Loads a data file into a list of `InputBatch`s."""
+
+    unique_id = 1000000000
+
+    for (example_index, example) in enumerate(examples):
+        query_tokens = tokenizer.tokenize(example.question_text)
+
+        if len(query_tokens) > max_query_length:
+            query_tokens = query_tokens[0:max_query_length]
+
+        tok_to_orig_index = []
+        orig_to_tok_index = []
+        all_doc_tokens = []
+        for (i, token) in enumerate(example.doc_tokens):
+            orig_to_tok_index.append(len(all_doc_tokens))
+            sub_tokens = tokenizer.tokenize(token)
+            for sub_token in sub_tokens:
+                tok_to_orig_index.append(i)
+                all_doc_tokens.append(sub_token)
+
+        tok_start_position = None
+        tok_end_position = None
+        if is_training and example.is_impossible:
+            tok_start_position = -1
+            tok_end_position = -1
+        if is_training and not example.is_impossible:
+            tok_start_position = orig_to_tok_index[example.start_position]
+            if example.end_position < len(example.doc_tokens) - 1:
+                tok_end_position = orig_to_tok_index[example.end_position +
+                                                     1] - 1
+            else:
+                tok_end_position = len(all_doc_tokens) - 1
+            (tok_start_position, tok_end_position) = _improve_answer_span(
+                all_doc_tokens, tok_start_position, tok_end_position,
+                tokenizer, example.orig_answer_text)
+
+        # The -3 accounts for [CLS], [SEP] and [SEP]
+        max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
+
+        # We can have documents that are longer than the maximum sequence length.
+        # To deal with this we do a sliding window approach, where we take chunks
+        # of the up to our max length with a stride of `doc_stride`.
+        _DocSpan = collections.namedtuple(  # pylint: disable=invalid-name
+            "DocSpan", ["start", "length"])
+        doc_spans = []
+        start_offset = 0
+        while start_offset < len(all_doc_tokens):
+            length = len(all_doc_tokens) - start_offset
+            if length > max_tokens_for_doc:
+                length = max_tokens_for_doc
+            doc_spans.append(_DocSpan(start=start_offset, length=length))
+            if start_offset + length == len(all_doc_tokens):
+                break
+            start_offset += min(length, doc_stride)
+
+        for (doc_span_index, doc_span) in enumerate(doc_spans):
+            tokens = []
+            token_to_orig_map = {}
+            token_is_max_context = {}
+            segment_ids = []
+            tokens.append("[CLS]")
+            segment_ids.append(0)
+            for token in query_tokens:
+                tokens.append(token)
+                segment_ids.append(0)
+            tokens.append("[SEP]")
+            segment_ids.append(0)
+
+            for i in range(doc_span.length):
+                split_token_index = doc_span.start + i
+                token_to_orig_map[len(
+                    tokens)] = tok_to_orig_index[split_token_index]
+
+                is_max_context = _check_is_max_context(doc_spans,
+                                                       doc_span_index,
+                                                       split_token_index)
+                token_is_max_context[len(tokens)] = is_max_context
+                tokens.append(all_doc_tokens[split_token_index])
+                segment_ids.append(1)
+            tokens.append("[SEP]")
+            segment_ids.append(1)
+
+            input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+            # The mask has 1 for real tokens and 0 for padding tokens. Only real
+            # tokens are attended to.
+            input_mask = [1] * len(input_ids)
+
+            # Zero-pad up to the sequence length.
+            while len(input_ids) < max_seq_length:
+                input_ids.append(0)
+                input_mask.append(0)
+                segment_ids.append(0)
+
+            assert len(input_ids) == max_seq_length
+            assert len(input_mask) == max_seq_length
+            assert len(segment_ids) == max_seq_length
+
+            start_position = None
+            end_position = None
+            if is_training and not example.is_impossible:
+                # For training, if our document chunk does not contain an annotation
+                # we throw it out, since there is nothing to predict.
+                doc_start = doc_span.start
+                doc_end = doc_span.start + doc_span.length - 1
+                out_of_span = False
+                if not (tok_start_position >= doc_start
+                        and tok_end_position <= doc_end):
+                    out_of_span = True
+                if out_of_span:
+                    start_position = 0
+                    end_position = 0
+                else:
+                    doc_offset = len(query_tokens) + 2
+                    start_position = tok_start_position - doc_start + doc_offset
+                    end_position = tok_end_position - doc_start + doc_offset
+
+            if is_training and example.is_impossible:
+                start_position = 0
+                end_position = 0
+
+            if verbose_logging and example_index < 20:
+                print("*** Example ***")
+                print("unique_id: %s" % (unique_id))
+                print("example_index: %s" % (example_index))
+                print("doc_span_index: %s" % (doc_span_index))
+                print(
+                    "tokens: %s" %
+                    " ".join([tokenization.printable_text(x) for x in tokens]))
+                print("token_to_orig_map: %s" % " ".join([
+                    "%d:%d" % (x, y)
+                    for (x, y) in six.iteritems(token_to_orig_map)
+                ]))
+                print("token_is_max_context: %s" % " ".join([
+                    "%d:%s" % (x, y)
+                    for (x, y) in six.iteritems(token_is_max_context)
+                ]))
+                print("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+                print("input_mask: %s" % " ".join([str(x)
+                                                   for x in input_mask]))
+                print("segment_ids: %s" %
+                      " ".join([str(x) for x in segment_ids]))
+                if is_training and example.is_impossible:
+                    print("impossible example")
+                if is_training and not example.is_impossible:
+                    answer_text = " ".join(
+                        tokens[start_position:(end_position + 1)])
+                    print("start_position: %d" % (start_position))
+                    print("end_position: %d" % (end_position))
+                    print("answer: %s" %
+                          (tokenization.printable_text(answer_text)))
+
+            feature = InputFeatures(unique_id=unique_id,
+                                    example_index=example_index,
+                                    doc_span_index=doc_span_index,
+                                    tokens=tokens,
+                                    token_to_orig_map=token_to_orig_map,
+                                    token_is_max_context=token_is_max_context,
+                                    input_ids=input_ids,
+                                    input_mask=input_mask,
+                                    segment_ids=segment_ids,
+                                    start_position=start_position,
+                                    end_position=end_position,
+                                    is_impossible=example.is_impossible)
+
+            # Run callback
+            output_fn(feature)
+
+            unique_id += 1
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
new file mode 100644
index 00000000..fc9ad6a4
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
@@ -0,0 +1,199 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+# To support feature cache.
+import pickle
+from transformers import BertTokenizer, AutoTokenizer
+from general_perf.datasets.open_squad.create_squad_data import read_squad_examples, convert_examples_to_features
+import collections
+from general_perf.datasets import data_loader
+import logging
+from tqdm import tqdm
+import numpy as np
+
+INPUT_TYPE = {
+    "UINT8": np.uint8,
+    "FLOAT32": np.float32,
+    "LONG": np.long,
+    "INT32": np.int32,
+    "INT64": np.int64
+}
+
+max_seq_length = 384
+max_query_length = 64
+doc_stride = 128
+
+log = logging.getLogger("SQUAD")
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+
+        log.info("Initial...")
+        self.config = config
+        model = self.config["model"]
+        total_count_override = None
+        perf_count_override = None
+        eval_features = []
+        # Load features if cached, convert from examples otherwise.
+        input_file = "general_perf/datasets/open_squad/dev-v1.1.json"
+        cache_path = 'general_perf/datasets/open_squad/eval_features_' + self.config[
+            'model'] + '.pickle'
+        if os.path.exists(cache_path):
+            with open(cache_path, 'rb') as cache_file:
+                eval_features = pickle.load(cache_file)
+            eval_examples = read_squad_examples(input_file=input_file,
+                                                is_training=False,
+                                                version_2_with_negative=False)
+        else:
+            log.info("Start to generate data")
+            if "roberta" in self.config['model']:
+                tokenizer = AutoTokenizer.from_pretrained(
+                    "csarron/roberta-base-squad-v1")
+            elif "albert" in self.config['model']:
+                tokenizer = AutoTokenizer.from_pretrained(
+                    "madlag/albert-base-v2-squad")
+            elif "deberta" in self.config['model']:
+                tokenizer = AutoTokenizer.from_pretrained(
+                    "Palak/microsoft_deberta-base_squad")
+            else:
+                tokenizer = BertTokenizer(
+                    "general_perf/datasets/open_squad/vocab.txt")
+            eval_examples = read_squad_examples(input_file=input_file,
+                                                is_training=False,
+                                                version_2_with_negative=False)
+
+            def append_feature(feature):
+                eval_features.append(feature)
+
+            convert_examples_to_features(examples=eval_examples,
+                                         tokenizer=tokenizer,
+                                         max_seq_length=max_seq_length,
+                                         doc_stride=doc_stride,
+                                         max_query_length=max_query_length,
+                                         is_training=False,
+                                         output_fn=append_feature,
+                                         verbose_logging=False)
+
+            with open(cache_path, 'wb') as cache_file:
+                pickle.dump(eval_features, cache_file)
+
+        self.eval_features = eval_features
+        self.eval_examples = eval_examples
+        self.count = total_count_override or len(self.eval_features)
+        self.items = len(self.eval_features)
+        self.perf_count = perf_count_override or self.count
+        self.model = model
+        self.cur_bs = 1
+        self.batch_num = int(self.items / self.cur_bs)
+
+        # save mask name to help setting the the results at unmasked positions to zero
+        if "roberta" in self.model or "torch" in self.model:
+            self.mask_name = "attention_mask.1"
+        else:
+            self.mask_name = "input_mask:0"
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.batch_num, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        for i in tqdm(range(self.batch_num)):
+            features = collections.defaultdict(list)
+            for j in range(i * self.cur_bs, (i + 1) * self.cur_bs):
+                if "torch" in self.model:
+                    features['input_ids.1'].append(
+                        self.eval_features[j].input_ids)
+                    features['attention_mask.1'].append(
+                        self.eval_features[j].input_mask)
+                    if "roberta" in self.model:
+                        features['token_type_ids.1'].append(
+                            np.zeros((384,)))
+                    elif "deberta" in self.model:
+                        features['token_type_ids'].append(
+                            self.eval_features[j].segment_ids)
+                    else:
+                        features['token_type_ids.1'].append(
+                            self.eval_features[j].segment_ids)
+                else:
+                    features['input_ids:0'].append(
+                        self.eval_features[j].input_ids)
+                    features['input_mask:0'].append(
+                        self.eval_features[j].input_mask)
+                    features['segment_ids:0'].append(
+                        self.eval_features[j].segment_ids)
+            self.batched_data.append(features)
+
+    def get_samples(self, sample_id):
+        if sample_id >= len(self.batched_data) or sample_id < 0:
+            raise ValueError("Your Input ID is out of range")
+        return self.batched_data[sample_id], []
+
+    def get_id(self, sample_id):
+        if sample_id >= len(self.batched_data) or sample_id < 0:
+            raise ValueError("Your Input ID is out of range")
+        return [
+            self.eval_features[i].unique_id
+            for i in range(sample_id * self.cur_bs, (sample_id + 1) *
+                           self.cur_bs)
+        ]
+
+    def get_fake_samples(self, batch_size, shape, input_type):
+        data = {}
+
+        avg_seq_len = 192
+        max_seq_len = 384
+
+        if input_type:
+            i = 0
+            for key, val in shape.items():
+                val = [val[0] * batch_size] + val[1:]
+                if i == 0:
+                    # fake input id and mask
+                    input_ids = np.zeros(val).astype(INPUT_TYPE[input_type[i]])
+                    data[key] = input_ids
+                elif i == 1:
+                    # fake input array length
+                    input_len = np.random.randint(low=2 * avg_seq_len -
+                                                  max_seq_len,
+                                                  high=max_seq_len + 1,
+                                                  size=(batch_size),
+                                                  dtype=np.int32)
+
+                    input_mask = np.zeros(val).astype(
+                        INPUT_TYPE[input_type[i]])
+
+                    for b_idx, s_len in enumerate(input_len):
+                        input_mask[b_idx][:s_len] = 1
+                    data[key] = input_mask
+                else:
+                    data[key] = np.zeros(val).astype(INPUT_TYPE[input_type[i]])
+                i += 1
+            return data
+        else:
+            raise ValueError("Please provide input type")
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/test_accuracy.py
new file mode 100644
index 00000000..5edd352c
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/test_accuracy.py
@@ -0,0 +1,134 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import collections
+import numpy as np
+import tensorflow as tf
+import torch
+from tqdm import tqdm
+
+from general_perf.datasets.open_squad.bert.accuracy_squad import write_predictions
+from general_perf.datasets.open_squad.bert.evaluate import check_accuracy
+from general_perf.datasets import test_accuracy
+
+
+RawResult = collections.namedtuple("RawResult",
+                                   ["unique_id", "start_logits", "end_logits"])
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        results, diffs = [], []
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+
+        for i in tqdm(range(num)):
+            test_data, _ = self.dataloader.get_samples(i)
+            unique_ids = self.dataloader.get_id(i)
+            result = self.runtime_backend.predict(test_data)
+            start_logits, end_logits = self._post_processing(
+                result, self.configs['framework'])
+
+            # set results at unmasked positions to zero since the vendor's result may have different value at those meaningless positions
+            def set_unmask_to_zero(res, mask):
+                arr = np.array(res)
+                arr[mask == 0] = 0.0
+                return list(arr)
+
+            for i, mask in enumerate(np.array(test_data[self.dataloader.mask_name])):
+                for i, sl in enumerate(start_logits):
+                    start_logits[i] = set_unmask_to_zero(sl, mask)
+
+                for i, el in enumerate(end_logits):
+                    end_logits[i] = set_unmask_to_zero(el, mask)
+
+            for i, u_id in enumerate(unique_ids):
+                results.append(
+                    RawResult(unique_id=u_id,
+                              start_logits=start_logits[i],
+                              end_logits=end_logits[i]))
+
+            diffs.append(start_logits + end_logits)
+
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs)
+        data_file = os.path.abspath('.') + "/general_perf/datasets/open_squad/dev-v1.1.json"
+        predict_file = self.output_dir[:self.output_dir.
+                                       rindex('/')] + "/predictions.json"
+        write_predictions(self.dataloader.eval_examples,
+                          self.dataloader.eval_features, results, 20, 30, True,
+                          predict_file)
+        result = check_accuracy(data_file, predict_file,
+                                num * self.dataloader.cur_bs)
+        log.info('Batch size is {}, F1: {}, Exact Match:{}'.format(
+            self.dataloader.cur_bs, result['F1 Score'], result['Exact Match']))
+        return result
+
+    def _post_processing(self, inputs, framework):
+        start_results, end_results = [], []
+
+        if framework == "Tensorflow":
+            if 'distill' in self.configs['model']:
+                (start_logits, end_logits) = (inputs["output_0"],
+                                              inputs["output_1"])
+                for i in range(self.dataloader.cur_bs):
+                    start_logit = [float(x) for x in start_logits[i].flat]
+                    end_logit = [float(x) for x in end_logits[i].flat]
+                    start_results.append(start_logit)
+                    end_results.append(end_logit)
+            else:
+                tensor_name = list(inputs)[0]
+                for i in range(len(inputs[tensor_name])):
+                    logits = tf.transpose(np.array([inputs[tensor_name][i]]),
+                                          [2, 0, 1])
+                    unstacked_logits = tf.unstack(logits, axis=0)
+                    if tf.executing_eagerly():
+                        (start_logit,
+                         end_logit) = (unstacked_logits[0].numpy(),
+                                       unstacked_logits[1].numpy())
+                    else:
+                        with tf.compat.v1.Session():
+                            (start_logit,
+                             end_logit) = (unstacked_logits[0].eval(),
+                                           unstacked_logits[1].eval())
+                    start_logit = [float(x) for x in start_logit.flat]
+                    end_logit = [float(x) for x in end_logit.flat]
+                    start_results.append(start_logit)
+                    end_results.append(end_logit)
+        else:
+            if isinstance(inputs, dict):
+                (start_logits, end_logits) = (
+                    inputs["start_logits"],
+                    inputs["end_logits"],
+                )
+            elif isinstance(inputs[0], torch.Tensor):
+                (start_logits, end_logits) = (
+                    inputs[0].float().cpu().detach().numpy() if inputs[0].dtype==torch.bfloat16 else inputs[0].cpu().detach().numpy(),
+                    inputs[1].float().cpu().detach().numpy() if inputs[1].dtype==torch.bfloat16 else inputs[1].cpu().detach().numpy(),
+                )
+            else:
+                (start_logits, end_logits) = (inputs[0], inputs[1])
+            
+            for i in range(self.dataloader.cur_bs):
+                start_logit = [float(x) for x in start_logits[i].flat]
+                end_logit = [float(x) for x in end_logits[i].flat]
+                start_results.append(start_logit)
+                end_results.append(end_logit)
+
+        return start_results, end_results
-- 
Gitee


From 62573ff08644a9ecbf4ddce8df3a039098dfbc53 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 14 Jan 2025 14:00:02 +0800
Subject: [PATCH 20/35] update bytemlperf model

---
 models/nlp/language_model/deberta/ixrt/ci/prepare.sh | 7 ++++---
 models/nlp/language_model/roberta/ixrt/README.md     | 6 +++---
 models/nlp/language_model/roberta/ixrt/ci/prepare.sh | 2 +-
 models/nlp/language_model/videobert/ixrt/README.md   | 1 -
 tests/run_ixrt.py                                    | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
index 755d4f19..4b3452fe 100644
--- a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
@@ -27,7 +27,7 @@ onnxsim deberta-torch-fp32.onnx deberta-torch-fp32-sim.onnx
 python3 remove_clip_and_cast.py
 
 mkdir -p data/open_deberta
-mv ./deberta-sim-drop-clip-drop-invaild-cast.onnx data/open_deberta/deberta.onnx
+cp ./deberta-sim-drop-clip-drop-invaild-cast.onnx data/open_deberta/deberta.onnx
 
 ln -s ../../../../../toolbox/ByteMLPerf ./
 
@@ -37,8 +37,9 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requ
 # setup
 cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
 
-cp ./deberta-sim-drop-clip-drop-invaild-cast.onnx /root/data/checkpoints/open_deberta/
-cp -r /root/data/checkpoints/open_deberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular
+cp -r /root/data/checkpoints/open_deberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+cp ./deberta-sim-drop-clip-drop-invaild-cast.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_deberta/
 
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 cp -r /root/data/3rd_party/workloads ./
diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md
index 55478c31..f37d5f3c 100644
--- a/models/nlp/language_model/roberta/ixrt/README.md
+++ b/models/nlp/language_model/roberta/ixrt/README.md
@@ -83,13 +83,13 @@ rm -f open_squad.tar
 wget http://files.deepspark.org.cn:880/deepspark/csarron.tar
 tar xf csarron.tar
 rm -f csarron.tar
-mv csarron/ ./ByteMLPerf/byte_infer_perf/
+mv csarron/ ./ByteMLPerf/byte_infer_perf/general_perf/
 
 # Run Acc scripts
-cd ./ByteMLPerf/byte_infer_perf/
+cd ./ByteMLPerf/byte_infer_perf/general_perf/
 mkdir -p workloads
 wget -O workloads/roberta-torch-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/roberta-torch-fp32.json
-python3 general_perf/core/perf_engine.py --hardware_type ILUVATAR --task roberta-torch-fp32
+python3 core/perf_engine.py --hardware_type ILUVATAR --task roberta-torch-fp32
 ```
 
 ## Results
diff --git a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
index c4df075b..b32ce0e4 100644
--- a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
@@ -46,6 +46,6 @@ cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/da
 wget http://files.deepspark.org.cn:880/deepspark/csarron.tar
 tar xf csarron.tar
 rm -f csarron.tar
-mv csarron/ ./ByteMLPerf/byte_infer_perf/
+mv csarron/ ./ByteMLPerf/byte_infer_perf/general_perf/
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 cp -r /root/data/3rd_party/workloads ./
\ No newline at end of file
diff --git a/models/nlp/language_model/videobert/ixrt/README.md b/models/nlp/language_model/videobert/ixrt/README.md
index dfbe449b..7bfb8c92 100644
--- a/models/nlp/language_model/videobert/ixrt/README.md
+++ b/models/nlp/language_model/videobert/ixrt/README.md
@@ -64,7 +64,6 @@ mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videob
 cp ./general_perf/model_zoo/popular/open_videobert/video-bert.onnx ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
 
 # run acc scripts
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 mkdir -p workloads
 wget -O workloads/videobert-onnx-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/videobert-onnx-fp32.json
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index de933af5..4a2b347c 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -361,7 +361,7 @@ def run_nlp_testcase(model):
         set -x
         cd ../{model['relative_path']}
         export ORIGIN_ONNX_NAME=./data/open_{model_name}/{model_name}
-        export OPTIMIER_FILE=./optimizer.py
+        export OPTIMIER_FILE=/root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py
         export PROJ_PATH=./
         bash scripts/infer_{model_name}_{prec}_performance.sh
         cd ./ByteMLPerf/byte_infer_perf/general_perf
-- 
Gitee


From 91a3a18d6b8dbed69b2aed30c74b3437a486a429 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 14 Jan 2025 14:11:09 +0800
Subject: [PATCH 21/35] update model

---
 .../ctr-prediction/widedeep/ixrt/README.md    |   2 +
 .../widedeep/ixrt/ci/prepare.sh               |   9 +-
 .../conformer/ixrt/README.md                  |   4 +-
 ...rt.sh => infer_conformer_fp16_accuracy.sh} |   0
 ...sh => infer_conformer_fp16_performance.sh} |   0
 .../transformer_asr/ixrt/ci/prepare.sh        |   2 +-
 .../open_criteo_kaggle/data_loader.py         | 102 ++++++++++
 .../open_criteo_kaggle/preprocess_dataset.py  | 174 ++++++++++++++++++
 .../open_criteo_kaggle/test_accuracy.py       |  47 +++++
 9 files changed, 329 insertions(+), 11 deletions(-)
 rename models/speech/speech_recognition/conformer/ixrt/scripts/{infer_conformer_fp16_accuracy_ixrt.sh => infer_conformer_fp16_accuracy.sh} (100%)
 rename models/speech/speech_recognition/conformer/ixrt/scripts/{infer_conformer_fp16_performance_ixrt.sh => infer_conformer_fp16_performance.sh} (100%)
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/data_loader.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/preprocess_dataset.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/test_accuracy.py

diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
index 835d70a7..51481c7f 100644
--- a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
@@ -9,6 +9,8 @@ Generalized linear models with nonlinear feature transformations are widely used
 ### Install
 
 ```bash
+apt install -y libnuma-dev
+
 pip3 install -r requirements.txt
 ```
 
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
index 3b31a219..56b71315 100644
--- a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
@@ -16,14 +16,7 @@
 
 set -x
 
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-if [[ ${ID} == "ubuntu" ]]; then
-    apt install -y libgl1-mesa-glx
-elif [[ ${ID} == "centos" ]]; then
-    yum install -y mesa-libGL
-else
-    echo "Not Support Os"
-fi
+apt install -y libnuma-dev
 
 pip install -r requirements.txt
 cp -r /root/data/checkpoints/open_wide_deep_saved_model ./
diff --git a/models/speech/speech_recognition/conformer/ixrt/README.md b/models/speech/speech_recognition/conformer/ixrt/README.md
index 593cda36..809cbf08 100644
--- a/models/speech/speech_recognition/conformer/ixrt/README.md
+++ b/models/speech/speech_recognition/conformer/ixrt/README.md
@@ -45,9 +45,9 @@ bash scripts/aishell_data_prepare.sh ${DATA_DIR} ${TOOL_DIR}
 
 ```bash
 # Accuracy
-bash scripts/infer_conformer_fp16_accuracy_ixrt.sh
+bash scripts/infer_conformer_fp16_accuracy.sh
 # Performance
-bash scripts/infer_conformer_fp16_performance_ixrt.sh
+bash scripts/infer_conformer_fp16_performance.sh
 ```
 
 ## Results
diff --git a/models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy_ixrt.sh b/models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy.sh
similarity index 100%
rename from models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy_ixrt.sh
rename to models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy.sh
diff --git a/models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance_ixrt.sh b/models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance.sh
similarity index 100%
rename from models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance_ixrt.sh
rename to models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance.sh
diff --git a/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
index 9408f73a..b2e8ce9e 100644
--- a/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
+++ b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
@@ -25,8 +25,8 @@ else
     echo "Not Support Os"
 fi
 
+cp -r /root/data/checkpoints/8886 results/transformer/
 mkdir -p results/transformer/8886/save
-cp -r /root/data/checkpoints/8886 results/transformer/8886
 mkdir -p /home/data/speechbrain/aishell/csv_data
 cp -r /root/data/datasets/AISHELL/data_aishell /home/data/speechbrain/aishell
 cp results/transformer/8886/*.csv /home/data/speechbrain/aishell/csv_data
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/data_loader.py
new file mode 100644
index 00000000..a224eaf5
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/data_loader.py
@@ -0,0 +1,102 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+from re import T
+import numpy as np
+from general_perf.datasets import data_loader
+from tqdm import tqdm
+
+log = logging.getLogger("CriteoKaggle")
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+
+        log.info("Initial...")
+        self.config = config
+        self.cur_bs = 1
+        if not os.path.exists("general_perf/datasets/{}/numeric.npy".format(
+                self.config['dataset_name'])):
+            from general_perf.datasets.open_criteo_kaggle.preprocess_dataset import csv_to_numpy
+            csv_to_numpy(
+                "general_perf/datasets/{}/eval.csv".format(
+                    self.config['dataset_name']),
+                "general_perf/datasets/{}/".format(self.config['dataset_name']))
+
+        num = np.load("general_perf/datasets/{}/numeric.npy".format(
+            self.config['dataset_name']))
+        cat = np.load("general_perf/datasets/{}/categorical.npy".format(
+            self.config['dataset_name']))
+        label = np.load("general_perf/datasets/{}/label.npy".format(
+            self.config['dataset_name']))
+        self.items = len(num)
+        self.batch_num = int(self.items / self.cur_bs)
+        self.feed_dict = {}
+        for i in tqdm(range(cat.shape[0])):
+            if i == 0:
+                self.feed_dict["new_categorical_placeholder:0"] = list(
+                    cat[i].reshape(-1, 2))
+                self.feed_dict["new_numeric_placeholder:0"] = list(
+                    num[i].reshape(1, -1))
+                self.feed_dict["label"] = list(label[i])
+            else:
+                self.feed_dict["new_categorical_placeholder:0"].extend(
+                    cat[i].reshape(-1, 2))
+                self.feed_dict["new_numeric_placeholder:0"].extend(
+                    num[i].reshape(1, -1))
+                self.feed_dict["label"].extend(label[i])
+        self.feed_dict['new_categorical_placeholder:0'] = np.array(
+            self.feed_dict['new_categorical_placeholder:0'], dtype=np.int64)
+        self.feed_dict['new_numeric_placeholder:0'] = np.array(
+            self.feed_dict['new_numeric_placeholder:0'], dtype=np.float32)
+        self.feed_dict['label'] = np.array(self.feed_dict['label'],
+                                           dtype=np.int64)
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.cur_bs, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        self.labels = []
+        for i in tqdm(range(self.batch_num)):
+            split_data = {
+                'new_categorical_placeholder:0':
+                self.feed_dict["new_categorical_placeholder:0"][i *
+                                                                self.cur_bs *
+                                                                26:(i + 1) *
+                                                                self.cur_bs *
+                                                                26, ],
+                'new_numeric_placeholder:0':
+                self.feed_dict["new_numeric_placeholder:0"][
+                    i * self.cur_bs:(i + 1) * self.cur_bs, ],
+            }
+            self.labels.append(
+                self.feed_dict["label"][i * self.cur_bs:(i + 1) *
+                                        self.cur_bs, ])
+            self.batched_data.append(split_data)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/preprocess_dataset.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/preprocess_dataset.py
new file mode 100644
index 00000000..b38adf83
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/preprocess_dataset.py
@@ -0,0 +1,174 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import pandas
+import argparse
+import numpy as np
+import tensorflow as tf
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input',
+                        type=str,
+                        default="eval.csv",
+                        help='full path of data file e.g. eval.csv',
+                        dest='evaldatafile_path',
+                        required=True)
+
+    args = parser.parse_args()
+    return args
+
+
+def version_is_less_than(a, b):
+    a_parts = a.split('.')
+    b_parts = b.split('.')
+
+    for i in range(len(a_parts)):
+        if int(a_parts[i]) < int(b_parts[i]):
+            print('{} < {}, version_is_less_than() returning False'.format(
+                a_parts[i], b_parts[i]))
+            return True
+    return False
+
+
+def csv_to_numpy(eval_csv_file, output):
+    print("TensorFlow version {}".format(tf.__version__))
+    required_tf_version = '2.0.0'
+
+    if version_is_less_than(tf.__version__, required_tf_version):
+        tf.compat.v1.enable_eager_execution()
+
+    # args = parse_args()
+    # eval_csv_file = args.evaldatafile_path
+
+    csv = pandas.read_csv(eval_csv_file, header=None)
+    if len(csv.columns) == 39:
+        dataset_type = 'test'
+    else:
+        dataset_type = 'eval'
+
+    fill_na_dict = {}
+    if dataset_type == 'test':
+        for i in range(0, 13):
+            fill_na_dict[i] = 0.0
+        for i in range(13, 39):
+            fill_na_dict[i] = ""
+    else:
+        for i in range(1, 14):
+            fill_na_dict[i] = 0.0
+        for i in range(14, 40):
+            fill_na_dict[i] = ""
+
+    csv = csv.fillna(value=fill_na_dict).values
+
+    LABEL_COLUMN = ["clicked"]
+    CATEGORICAL_COLUMNS1 = ["C" + str(i) + "_embedding" for i in range(1, 27)]
+    NUMERIC_COLUMNS1 = ["I" + str(i) for i in range(1, 14)]
+    CATEGORICAL_COLUMNS2 = ["C" + str(i) + "_embedding" for i in range(1, 27)]
+    NUMERIC_COLUMNS2 = ["I" + str(i) for i in range(1, 14)]
+
+    DATA_COLUMNS = LABEL_COLUMN + NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1
+
+    CATEGORICAL_COLUMNS1.sort()
+    NUMERIC_COLUMNS1.sort()
+
+    with open(eval_csv_file, 'r') as f:
+        nums = [line.strip('\n\r').split(',') for line in f.readlines()]
+        numpy_arr = np.array(nums)
+        numpy_arr[numpy_arr == ''] = '0'
+        min_list, max_list, range_list = [], [], []
+
+        for i in range(len(DATA_COLUMNS)):
+            if DATA_COLUMNS[i] in NUMERIC_COLUMNS1:
+                col_min = numpy_arr[:, i].astype(np.float32).min()
+                col_max = numpy_arr[:, i].astype(np.float32).max()
+                min_list.append(col_min)
+                max_list.append(col_max)
+                range_list.append(col_max - col_min)
+
+        print('min list', min_list)
+        print('max list', max_list)
+        print('range list', range_list)
+
+    all_data = []
+    no_of_rows = 0
+    for row in csv:
+        no_of_rows = no_of_rows + 1
+        unnormalized_vals = np.array(row[1:14])
+        normalized_vals = (unnormalized_vals - min_list) / range_list
+        new_categorical_dict = dict(zip(CATEGORICAL_COLUMNS2, row[14:40]))
+
+        new_categorical_list = []
+        for i in CATEGORICAL_COLUMNS1:
+            if pandas.isnull(new_categorical_dict[i]):
+                new_categorical_list.append("")
+            else:
+                new_categorical_list.append(new_categorical_dict[i])
+
+        if tf.executing_eagerly():
+            hash_values = tf.strings.to_hash_bucket_fast(
+                new_categorical_list, 1000).numpy()
+        else:
+            hash_tensor = tf.strings.to_hash_bucket_fast(
+                new_categorical_list, 1000)
+            with tf.compat.v1.Session() as sess:
+                hash_values = hash_tensor.eval()
+
+        new_numerical_dict = dict(zip(NUMERIC_COLUMNS2, normalized_vals))
+
+        item_data = {
+            "new_numeric_placeholder": [],
+            "new_categorical_placeholder": [],
+            "label": []
+        }
+
+        for i in NUMERIC_COLUMNS1:
+            item_data["new_numeric_placeholder"].extend(
+                [new_numerical_dict[i]])
+
+        for i in range(0, 26):
+            item_data["new_categorical_placeholder"].extend([i])
+            item_data["new_categorical_placeholder"].extend([hash_values[i]])
+
+        item_data["label"].append(row[0])
+
+        all_data.append(item_data)
+
+    wnd_num = []
+    wnd_cate = []
+    wnd_lable = []
+
+    for data in all_data:
+        wnd_num.append(data["new_numeric_placeholder"])
+        wnd_cate.append(data["new_categorical_placeholder"])
+        wnd_lable.append(data["label"])
+
+    np.save(os.path.join(output, "numeric.npy"), np.array(wnd_num))
+    np.save(os.path.join(output, "categorical.npy"), np.array(wnd_cate))
+    np.save(os.path.join(output, "label.npy"), np.array(wnd_lable))
+
+    print('Total number of rows ', no_of_rows)
+    print(
+        'Generated output file name : wnd_num.npy, wnd_cate.npy, wnd_label.npy'
+    )
+
+
+if __name__ == "__main__":
+    csv_to_numpy()
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/test_accuracy.py
new file mode 100644
index 00000000..145e9cb3
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/test_accuracy.py
@@ -0,0 +1,47 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numpy as np
+from general_perf.datasets import test_accuracy
+from tqdm import tqdm
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+        good, total = 0, 0
+        diffs = []
+        for i in tqdm(range(num)):
+            test_data, labels = self.dataloader.get_samples(i)
+
+            results = self.runtime_backend.predict(test_data)
+            results = results[list(results)[0]]
+            diffs.append(results)
+
+            for j in range(len(results)):
+                if np.argmax(results[j].round()) == labels[j].round():
+                    good += 1
+                total += 1
+
+        accuracy = round((good / total), 5)
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs)
+        log.info('Batch size is {}, Accuracy: {}'.format(
+            self.dataloader.cur_bs, accuracy))
+        return {"Top-1": accuracy}
-- 
Gitee


From 21ab83276ee870a4586e46a168f66a8ebff3d794 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 14 Jan 2025 14:52:26 +0800
Subject: [PATCH 22/35] update byteMLperf model

---
 .../swin_transformer_large/ixrt/ci/prepare.sh               | 2 +-
 models/nlp/language_model/albert/ixrt/ci/prepare.sh         | 2 ++
 models/nlp/language_model/albert/ixrt/requirements.txt      | 2 +-
 models/nlp/language_model/deberta/ixrt/requirements.txt     | 2 +-
 models/nlp/language_model/roberta/ixrt/ci/prepare.sh        | 6 +++---
 models/nlp/language_model/roberta/ixrt/requirements.txt     | 2 +-
 models/nlp/language_model/roformer/ixrt/requirements.txt    | 2 +-
 models/nlp/language_model/videobert/ixrt/ci/prepare.sh      | 4 ++--
 models/nlp/language_model/videobert/ixrt/requirements.txt   | 2 +-
 .../recommendation/ctr-prediction/widedeep/ixrt/README.md   | 2 ++
 .../ctr-prediction/widedeep/ixrt/ci/prepare.sh              | 4 +++-
 11 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
index 0581f0d8..4adb3218 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
+++ b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
@@ -45,4 +45,4 @@ cp -r /root/data/datasets/open_imagenet/* ByteMLPerf/byte_infer_perf/general_per
 mkdir -p ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
 cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
 
-cp -r /root/data/3rd_party/workloads ./ByteMLPerf/byte_infer_perf/general_perf/workloads
\ No newline at end of file
+cp -r /root/data/3rd_party/workloads ./ByteMLPerf/byte_infer_perf/general_perf/
\ No newline at end of file
diff --git a/models/nlp/language_model/albert/ixrt/ci/prepare.sh b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
index 83592230..9559705d 100644
--- a/models/nlp/language_model/albert/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
@@ -18,6 +18,8 @@ set -x
 
 apt install -y libnuma-dev
 
+pip3 install -r requirements.txt
+
 cp /root/data/3rd_party/albert-torch-fp32.json ./
 cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 
diff --git a/models/nlp/language_model/albert/ixrt/requirements.txt b/models/nlp/language_model/albert/ixrt/requirements.txt
index 7e0fcb9e..5b400783 100644
--- a/models/nlp/language_model/albert/ixrt/requirements.txt
+++ b/models/nlp/language_model/albert/ixrt/requirements.txt
@@ -8,4 +8,4 @@ tabulate
 pycocotools
 opencv-python==4.6.0.66
 transformers==4.33.3
-typing_extensions==4.12.2
\ No newline at end of file
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/deberta/ixrt/requirements.txt b/models/nlp/language_model/deberta/ixrt/requirements.txt
index 9dfdb081..f3f7ae70 100644
--- a/models/nlp/language_model/deberta/ixrt/requirements.txt
+++ b/models/nlp/language_model/deberta/ixrt/requirements.txt
@@ -9,4 +9,4 @@ pycocotools
 opencv-python==4.6.0.66
 tf2onnx
 transformers==4.33.3
-typing_extensions==4.12.2
\ No newline at end of file
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
index b32ce0e4..89734e52 100644
--- a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
@@ -25,10 +25,10 @@ cp -r /root/data/checkpoints/open_roberta data/
 cp /root/data/3rd_party/roberta-torch-fp32.json ./
 cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 # export onnx
-python3 export_onnx.py --model_path open_roberta/roberta-base-squad.pt --output_path open_roberta/roberta-torch-fp32.onnx
+python3 export_onnx.py --model_path data/open_roberta/roberta-base-squad.pt --output_path data/open_roberta/roberta-torch-fp32.onnx
 
 # Simplify onnx model
-onnxsim open_roberta/roberta-torch-fp32.onnx open_roberta/roberta.onnx
+onnxsim data/open_roberta/roberta-torch-fp32.onnx data/open_roberta/roberta.onnx
 
 # Link and install requirements
 ln -s ../../../../../toolbox/ByteMLPerf ./
@@ -37,7 +37,7 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requ
 
 # Move open_roberta
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
-mv open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+mv data/open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 
 # Get open_squad
 cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
diff --git a/models/nlp/language_model/roberta/ixrt/requirements.txt b/models/nlp/language_model/roberta/ixrt/requirements.txt
index de4e636f..4a715333 100644
--- a/models/nlp/language_model/roberta/ixrt/requirements.txt
+++ b/models/nlp/language_model/roberta/ixrt/requirements.txt
@@ -3,4 +3,4 @@ py-libnuma==1.2
 bert
 pycuda
 transformers==4.33.3
-typing_extensions==4.12.2
\ No newline at end of file
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/requirements.txt b/models/nlp/language_model/roformer/ixrt/requirements.txt
index a45ab311..19b9f1aa 100644
--- a/models/nlp/language_model/roformer/ixrt/requirements.txt
+++ b/models/nlp/language_model/roformer/ixrt/requirements.txt
@@ -2,4 +2,4 @@ tf2onnx
 pycuda
 onnxsim
 py-libnuma==1.2
-typing_extensions==4.12.2
\ No newline at end of file
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/videobert/ixrt/ci/prepare.sh b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
index 5efc12a7..0d46c6c0 100644
--- a/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
@@ -22,7 +22,7 @@ pip install -r requirements.txt
 
 mkdir -p data
 cp -r /root/data/checkpoints/open_videobert data/
-cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
+
 # link and install requirements
 ln -s ../../../../../toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
@@ -32,6 +32,6 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requ
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
 cp -r /root/data/datasets/open_cifar/cifar-100-python/ ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
-cp /root/data/checkpoints/open_videobert/video-bert.onnx ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
+cp /root/data/checkpoints/open_videobert/videobert.onnx ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/video-bert.onnx
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 cp -r /root/data/3rd_party/workloads ./
diff --git a/models/nlp/language_model/videobert/ixrt/requirements.txt b/models/nlp/language_model/videobert/ixrt/requirements.txt
index 7e0fcb9e..5b400783 100644
--- a/models/nlp/language_model/videobert/ixrt/requirements.txt
+++ b/models/nlp/language_model/videobert/ixrt/requirements.txt
@@ -8,4 +8,4 @@ tabulate
 pycocotools
 opencv-python==4.6.0.66
 transformers==4.33.3
-typing_extensions==4.12.2
\ No newline at end of file
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
index 51481c7f..e7f6246a 100644
--- a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
@@ -74,6 +74,8 @@ mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/mode
 
 # Run Acc scripts
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/widedeep-tf-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/widedeep-tf-fp32.json
 python3 core/perf_engine.py --hardware_type ILUVATAR --task widedeep-tf-fp32
 ```
 
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
index 56b71315..f42aed23 100644
--- a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
@@ -42,4 +42,6 @@ mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
 cp /root/data/datasets/eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
 
 wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx
-mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
\ No newline at end of file
+mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
+
+cp -r /root/data/3rd_party/workloads ./ByteMLPerf/byte_infer_perf/general_perf/
\ No newline at end of file
-- 
Gitee


From 0214f8f3b1927fc94e1e714f341c2d8f1737fe71 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 15 Jan 2025 13:54:45 +0800
Subject: [PATCH 23/35] update model

---
 .../efficientnet_b0/ixrt/quant copy.py        | 147 ------------------
 .../efficientnet_b0/ixrt/quant.py             | 136 +++++++++++++---
 models/cv/detection/yolov5s/ixrt/README.md    |   2 +-
 .../cv/detection/yolov6/ixrt/requirements.txt |   3 +-
 .../conformer/igie/ci/prepare.sh              |   2 +-
 .../conformer/ixrt/README.md                  |   2 +-
 .../conformer/ixrt/ci/prepare.sh              |   5 +-
 .../conformer/ixrt/ixrt_inference_accuracy.py |   4 +-
 .../ixrt/ixrt_inference_performance.py        |   4 +-
 .../transformer_asr/ixrt/ci/prepare.sh        |   5 +-
 tests/models_ixrt.yaml                        |   1 -
 11 files changed, 127 insertions(+), 184 deletions(-)
 delete mode 100644 models/cv/classification/efficientnet_b0/ixrt/quant copy.py

diff --git a/models/cv/classification/efficientnet_b0/ixrt/quant copy.py b/models/cv/classification/efficientnet_b0/ixrt/quant copy.py
deleted file mode 100644
index 72f85638..00000000
--- a/models/cv/classification/efficientnet_b0/ixrt/quant copy.py	
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-from ppq import *
-from ppq.api import *
-import os
-from calibration_dataset import getdataloader
-import argparse
-import random
-import numpy as np
-import torch
-
-random.seed(42)
-np.random.seed(42)
-torch.manual_seed(42)
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model_name", type=str)
-    parser.add_argument("--model", type=str)
-    parser.add_argument("--dataset_dir", type=str, default="imagenet_val")
-    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"],
-                        default="hist_percentile")
-    parser.add_argument("--disable_quant_names", nargs='*', type=str)
-    parser.add_argument("--save_dir", type=str, help="save path", default=None)
-    parser.add_argument("--bsz", type=int, default=32)
-    parser.add_argument("--step", type=int, default=32)
-    parser.add_argument("--seed", type=int, default=42)
-    parser.add_argument("--imgsz", type=int, default=224)
-    args = parser.parse_args()
-    print("Quant config:", args)
-    print(args.disable_quant_names)
-    return args
-
-
-config = parse_args()
-
-# modify configuration below:
-WORKING_DIRECTORY = 'checkpoints'  # choose your working directory
-TARGET_PLATFORM = TargetPlatform.TRT_INT8  # choose your target platform
-MODEL_TYPE = NetworkFramework.ONNX  # or NetworkFramework.CAFFE
-INPUT_LAYOUT = 'chw'  # input data layout, chw or hwc
-NETWORK_INPUTSHAPE = [config.bsz, 3, 224, 224]  # input shape of your network
-EXECUTING_DEVICE = 'cuda'  # 'cuda' or 'cpu'.
-REQUIRE_ANALYSE = False
-TRAINING_YOUR_NETWORK = True  # 是否需要 Finetuning 一下你的网络
-# -------------------------------------------------------------------
-# 加载你的模型文件，PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式
-# 如果你正使用 pytorch, tensorflow 等框架，你可以先将模型导出成 onnx
-# 使用 torch.onnx.export 即可，如果你在导出 torch 模型时发生错误，欢迎与我们联系。
-# -------------------------------------------------------------------
-graph = None
-if MODEL_TYPE == NetworkFramework.ONNX:
-    graph = load_onnx_graph(onnx_import_file=config.model)
-if MODEL_TYPE == NetworkFramework.CAFFE:
-    graph = load_caffe_graph(
-        caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'),
-        prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt'))
-assert graph is not None, 'Graph Loading Error, Check your input again.'
-
-# -------------------------------------------------------------------
-# SETTING 对象用于控制 PPQ 的量化逻辑，主要描述了图融合逻辑、调度方案、量化细节策略等
-# 当你的网络量化误差过高时，你需要修改 SETTING 对象中的属性来进行特定的优化
-# -------------------------------------------------------------------
-QS = QuantizationSettingFactory.default_setting()
-
-# -------------------------------------------------------------------
-# 下面向你展示了如何使用 finetuning 过程提升量化精度
-# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度
-# 开启他们的方式都是 QS.xxxx = True
-# 按需使用，不要全部打开，容易起飞
-# -------------------------------------------------------------------
-if TRAINING_YOUR_NETWORK:
-    QS.lsq_optimization = True  # 启动网络再训练过程，降低量化误差
-    QS.lsq_optimization_setting.steps = 800  # 再训练步数，影响训练时间，500 步大概几分钟
-    QS.lsq_optimization_setting.collecting_device = 'cpu'  # 缓存数据放在那，cuda 就是放在gpu，如果显存超了你就换成 'cpu'
-
-
-dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz)
-# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x，但是你如果没有装相应编译环境的话是编译不了的
-# 你可以尝试安装编译环境，或者在不启动 CUDA KERNEL 的情况下完成量化：移除 with ENABLE_CUDA_KERNEL(): 即可
-with ENABLE_CUDA_KERNEL():
-    print('网络正量化中，根据你的量化配置，这将需要一段时间:')
-    quantized = quantize_native_model(
-        setting=QS,  # setting 对象用来控制标准量化逻辑
-        model=graph,
-        calib_dataloader=dataloader,
-        calib_steps=config.step,
-        input_shape=NETWORK_INPUTSHAPE,  # 如果你的网络只有一个输入，使用这个参数传参
-        inputs=None,
-        # 如果你的网络有多个输入，使用这个参数传参，就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)]
-        collate_fn=lambda x: x[0].to(EXECUTING_DEVICE),  # collate_fn 跟 torch dataloader 的 collate fn 是一样的，用于数据预处理，
-        # 你当然也可以用 torch dataloader 的那个，然后设置这个为 None
-        platform=TARGET_PLATFORM,
-        device=EXECUTING_DEVICE,
-        do_quantize=True)
-
-    # -------------------------------------------------------------------
-    # 如果你需要执行量化后的神经网络并得到结果，则需要创建一个 executor
-    # 这个 executor 的行为和 torch.Module 是类似的，你可以利用这个东西来获取执行结果
-    # 请注意，必须在 export 之前执行此操作。
-    # -------------------------------------------------------------------
-    executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE)
-    # output = executor.forward(input)
-
-    # -------------------------------------------------------------------
-    # PPQ 计算量化误差时，使用信噪比的倒数作为指标，即噪声能量 / 信号能量
-    # 量化误差 0.1 表示在整体信号中，量化噪声的能量约为 10%
-    # 你应当注意，在 graphwise_error_analyse 分析中，我们衡量的是累计误差
-    # 网络的最后一层往往都具有较大的累计误差，这些误差是其前面的所有层所共同造成的
-    # 你需要使用 layerwise_error_analyse 逐层分析误差的来源
-    # -------------------------------------------------------------------
-    print('正计算网络量化误差(SNR)，最后一层的误差应小于 0.1 以保证量化精度:')
-    reports = graphwise_error_analyse(
-        graph=quantized, running_device=EXECUTING_DEVICE, steps=32,
-        dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE))
-    for op, snr in reports.items():
-        if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著，请考虑进行优化')
-
-    if REQUIRE_ANALYSE:
-        print('正计算逐层量化误差(SNR)，每一层的独立量化误差应小于 0.1 以保证量化精度:')
-        layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE,
-                                interested_outputs=None,
-                                dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))
-
-    # -------------------------------------------------------------------
-    # 使用 export_ppq_graph 函数来导出量化后的模型
-    # PPQ 会根据你所选择的导出平台来修改模型格式
-    # -------------------------------------------------------------------
-    print('网络量化结束，正在生成目标文件:')
-    export_ppq_graph(
-        graph=quantized, platform=TARGET_PLATFORM,
-        graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"),
-        config_save_to=os.path.join(config.save_dir, 'quant_cfg.json'))
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b0/ixrt/quant.py b/models/cv/classification/efficientnet_b0/ixrt/quant.py
index 78aee19e..72f85638 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/quant.py
+++ b/models/cv/classification/efficientnet_b0/ixrt/quant.py
@@ -13,33 +13,31 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
+from ppq import *
+from ppq.api import *
 import os
-import cv2
-import random
+from calibration_dataset import getdataloader
 import argparse
+import random
 import numpy as np
-from random import shuffle
-from tensorrt.deploy import static_quantize
-
 import torch
-import torchvision.datasets
-from calibration_dataset import getdataloader
 
-def setseed(seed=42):
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
+random.seed(42)
+np.random.seed(42)
+torch.manual_seed(42)
+
 
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--model_name", type=str)
     parser.add_argument("--model", type=str)
     parser.add_argument("--dataset_dir", type=str, default="imagenet_val")
-    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile")
+    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"],
+                        default="hist_percentile")
     parser.add_argument("--disable_quant_names", nargs='*', type=str)
-    parser.add_argument("--save_dir", type=str,  help="save path", default=None)
+    parser.add_argument("--save_dir", type=str, help="save path", default=None)
     parser.add_argument("--bsz", type=int, default=32)
-    parser.add_argument("--step", type=int, default=20)
+    parser.add_argument("--step", type=int, default=32)
     parser.add_argument("--seed", type=int, default=42)
     parser.add_argument("--imgsz", type=int, default=224)
     args = parser.parse_args()
@@ -47,13 +45,103 @@ def parse_args():
     print(args.disable_quant_names)
     return args
 
-args = parse_args()
-setseed(args.seed)
-calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz)
-static_quantize(args.model,
-        calibration_dataloader=calibration_dataloader,
-        save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"),
-        observer=args.observer,
-        data_preprocess=lambda x: x[0].to("cuda"),
-        quant_format="ppq",
-        disable_quant_names=args.disable_quant_names)
\ No newline at end of file
+
+config = parse_args()
+
+# modify configuration below:
+WORKING_DIRECTORY = 'checkpoints'  # choose your working directory
+TARGET_PLATFORM = TargetPlatform.TRT_INT8  # choose your target platform
+MODEL_TYPE = NetworkFramework.ONNX  # or NetworkFramework.CAFFE
+INPUT_LAYOUT = 'chw'  # input data layout, chw or hwc
+NETWORK_INPUTSHAPE = [config.bsz, 3, 224, 224]  # input shape of your network
+EXECUTING_DEVICE = 'cuda'  # 'cuda' or 'cpu'.
+REQUIRE_ANALYSE = False
+TRAINING_YOUR_NETWORK = True  # 是否需要 Finetuning 一下你的网络
+# -------------------------------------------------------------------
+# 加载你的模型文件，PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式
+# 如果你正使用 pytorch, tensorflow 等框架，你可以先将模型导出成 onnx
+# 使用 torch.onnx.export 即可，如果你在导出 torch 模型时发生错误，欢迎与我们联系。
+# -------------------------------------------------------------------
+graph = None
+if MODEL_TYPE == NetworkFramework.ONNX:
+    graph = load_onnx_graph(onnx_import_file=config.model)
+if MODEL_TYPE == NetworkFramework.CAFFE:
+    graph = load_caffe_graph(
+        caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'),
+        prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt'))
+assert graph is not None, 'Graph Loading Error, Check your input again.'
+
+# -------------------------------------------------------------------
+# SETTING 对象用于控制 PPQ 的量化逻辑，主要描述了图融合逻辑、调度方案、量化细节策略等
+# 当你的网络量化误差过高时，你需要修改 SETTING 对象中的属性来进行特定的优化
+# -------------------------------------------------------------------
+QS = QuantizationSettingFactory.default_setting()
+
+# -------------------------------------------------------------------
+# 下面向你展示了如何使用 finetuning 过程提升量化精度
+# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度
+# 开启他们的方式都是 QS.xxxx = True
+# 按需使用，不要全部打开，容易起飞
+# -------------------------------------------------------------------
+if TRAINING_YOUR_NETWORK:
+    QS.lsq_optimization = True  # 启动网络再训练过程，降低量化误差
+    QS.lsq_optimization_setting.steps = 800  # 再训练步数，影响训练时间，500 步大概几分钟
+    QS.lsq_optimization_setting.collecting_device = 'cpu'  # 缓存数据放在那，cuda 就是放在gpu，如果显存超了你就换成 'cpu'
+
+
+dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz)
+# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x，但是你如果没有装相应编译环境的话是编译不了的
+# 你可以尝试安装编译环境，或者在不启动 CUDA KERNEL 的情况下完成量化：移除 with ENABLE_CUDA_KERNEL(): 即可
+with ENABLE_CUDA_KERNEL():
+    print('网络正量化中，根据你的量化配置，这将需要一段时间:')
+    quantized = quantize_native_model(
+        setting=QS,  # setting 对象用来控制标准量化逻辑
+        model=graph,
+        calib_dataloader=dataloader,
+        calib_steps=config.step,
+        input_shape=NETWORK_INPUTSHAPE,  # 如果你的网络只有一个输入，使用这个参数传参
+        inputs=None,
+        # 如果你的网络有多个输入，使用这个参数传参，就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)]
+        collate_fn=lambda x: x[0].to(EXECUTING_DEVICE),  # collate_fn 跟 torch dataloader 的 collate fn 是一样的，用于数据预处理，
+        # 你当然也可以用 torch dataloader 的那个，然后设置这个为 None
+        platform=TARGET_PLATFORM,
+        device=EXECUTING_DEVICE,
+        do_quantize=True)
+
+    # -------------------------------------------------------------------
+    # 如果你需要执行量化后的神经网络并得到结果，则需要创建一个 executor
+    # 这个 executor 的行为和 torch.Module 是类似的，你可以利用这个东西来获取执行结果
+    # 请注意，必须在 export 之前执行此操作。
+    # -------------------------------------------------------------------
+    executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE)
+    # output = executor.forward(input)
+
+    # -------------------------------------------------------------------
+    # PPQ 计算量化误差时，使用信噪比的倒数作为指标，即噪声能量 / 信号能量
+    # 量化误差 0.1 表示在整体信号中，量化噪声的能量约为 10%
+    # 你应当注意，在 graphwise_error_analyse 分析中，我们衡量的是累计误差
+    # 网络的最后一层往往都具有较大的累计误差，这些误差是其前面的所有层所共同造成的
+    # 你需要使用 layerwise_error_analyse 逐层分析误差的来源
+    # -------------------------------------------------------------------
+    print('正计算网络量化误差(SNR)，最后一层的误差应小于 0.1 以保证量化精度:')
+    reports = graphwise_error_analyse(
+        graph=quantized, running_device=EXECUTING_DEVICE, steps=32,
+        dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE))
+    for op, snr in reports.items():
+        if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著，请考虑进行优化')
+
+    if REQUIRE_ANALYSE:
+        print('正计算逐层量化误差(SNR)，每一层的独立量化误差应小于 0.1 以保证量化精度:')
+        layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE,
+                                interested_outputs=None,
+                                dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))
+
+    # -------------------------------------------------------------------
+    # 使用 export_ppq_graph 函数来导出量化后的模型
+    # PPQ 会根据你所选择的导出平台来修改模型格式
+    # -------------------------------------------------------------------
+    print('网络量化结束，正在生成目标文件:')
+    export_ppq_graph(
+        graph=quantized, platform=TARGET_PLATFORM,
+        graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"),
+        config_save_to=os.path.join(config.save_dir, 'quant_cfg.json'))
\ No newline at end of file
diff --git a/models/cv/detection/yolov5s/ixrt/README.md b/models/cv/detection/yolov5s/ixrt/README.md
index 689ed69b..3847db5a 100755
--- a/models/cv/detection/yolov5s/ixrt/README.md
+++ b/models/cv/detection/yolov5s/ixrt/README.md
@@ -20,7 +20,7 @@ pip3 install -r requirements.txt
 
 ### Download
 
-Pretrained model: <https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt>
+Pretrained model: <https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt>
 
 Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
 
diff --git a/models/cv/detection/yolov6/ixrt/requirements.txt b/models/cv/detection/yolov6/ixrt/requirements.txt
index c5ff461d..2c833a42 100644
--- a/models/cv/detection/yolov6/ixrt/requirements.txt
+++ b/models/cv/detection/yolov6/ixrt/requirements.txt
@@ -2,4 +2,5 @@ tqdm
 onnx
 onnxsim
 pycocotools
-pycuda
\ No newline at end of file
+pycuda
+numpy==1.24.0
\ No newline at end of file
diff --git a/models/speech/speech_recognition/conformer/igie/ci/prepare.sh b/models/speech/speech_recognition/conformer/igie/ci/prepare.sh
index f05e4ce9..49f448a6 100644
--- a/models/speech/speech_recognition/conformer/igie/ci/prepare.sh
+++ b/models/speech/speech_recognition/conformer/igie/ci/prepare.sh
@@ -20,7 +20,7 @@ pip3 install -r requirements.txt
 cd ctc_decoder/swig && bash setup.sh
 cd ../../
 
-tar -zxvf 20211025_conformer_exp.tar.gz
+# tar -zxvf 20211025_conformer_exp.tar.gz
 
 # Get Onnx Model
 cd wenet
diff --git a/models/speech/speech_recognition/conformer/ixrt/README.md b/models/speech/speech_recognition/conformer/ixrt/README.md
index 809cbf08..ed858421 100644
--- a/models/speech/speech_recognition/conformer/ixrt/README.md
+++ b/models/speech/speech_recognition/conformer/ixrt/README.md
@@ -34,7 +34,7 @@ ln -s /home/deepspark/datasets/INFER/conformer/20210601_u2++_conformer_exp_aishe
 
 ```bash
 # Accuracy
-DATA_DIR=/PATH/to/data_aishell
+DATA_DIR=/PATH/to/aishell_test_data
 TOOL_DIR="$(pwd)/tools"
 bash scripts/aishell_data_prepare.sh ${DATA_DIR} ${TOOL_DIR}
 ```
diff --git a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
index ec09d6ac..a02a84bb 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
+++ b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
@@ -25,8 +25,7 @@ else
     echo "Not Support Os"
 fi
 
-mkdir -p conformer_checkpoints
 ln -s /root/data/checkpoints/20210601_u2++_conformer_exp_aishell ./conformer_checkpoints
 
-cp -r /root/data/datasets/AISHELL/data_aishell ./
-bash scripts/aishell_data_prepare.sh data_aishell tools
\ No newline at end of file
+cp -r /root/data/datasets/AISHELL/data_aishell ./aishell_test_data
+bash scripts/aishell_data_prepare.sh aishell_test_data tools
\ No newline at end of file
diff --git a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
index 65245b2b..45a5a1ab 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
+++ b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
@@ -144,7 +144,7 @@ def main():
     args = get_args()
 
     # 读取配置文件
-    config_fn = os.path.join(args.model_dir, "config.yaml")
+    config_fn = os.path.join(args.model_dir, "train.yaml")
     with open(config_fn, "r") as fin:
         configs = yaml.load(fin, Loader=yaml.FullLoader)
 
@@ -164,7 +164,7 @@ def main():
     dataset_conf["batch_conf"]["batch_size"] = args.batch_size
 
     # Load dict
-    dict_fn = os.path.join(args.model_dir, "words.txt")
+    dict_fn = os.path.join(args.model_dir, "units.txt")
     char_dict = {}
     with open(dict_fn, "r", encoding="utf8") as fin:
         for line in fin:
diff --git a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
index 3ab7e405..fcfcb559 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
+++ b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
@@ -145,7 +145,7 @@ def main():
     args = get_args()
 
     # 读取配置文件
-    config_fn = os.path.join(args.model_dir, "config.yaml")
+    config_fn = os.path.join(args.model_dir, "train.yaml")
     with open(config_fn, "r") as fin:
         configs = yaml.load(fin, Loader=yaml.FullLoader)
 
@@ -165,7 +165,7 @@ def main():
     dataset_conf["batch_conf"]["batch_size"] = args.batch_size
 
     # Load dict
-    dict_fn = os.path.join(args.model_dir, "words.txt")
+    dict_fn = os.path.join(args.model_dir, "units.txt")
     char_dict = {}
     with open(dict_fn, "r", encoding="utf8") as fin:
         for line in fin:
diff --git a/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
index b2e8ce9e..5a1f9668 100644
--- a/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
+++ b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
@@ -25,10 +25,13 @@ else
     echo "Not Support Os"
 fi
 
+pip3 install -r requirements.txt
+
+mkdir -p results/transformer
 cp -r /root/data/checkpoints/8886 results/transformer/
 mkdir -p results/transformer/8886/save
 mkdir -p /home/data/speechbrain/aishell/csv_data
-cp -r /root/data/datasets/AISHELL/data_aishell /home/data/speechbrain/aishell
+ln -s /root/data/datasets/AISHELL/data_aishell /home/data/speechbrain/aishell/
 cp results/transformer/8886/*.csv /home/data/speechbrain/aishell/csv_data
 
 bash build.sh
diff --git a/tests/models_ixrt.yaml b/tests/models_ixrt.yaml
index 051d0017..c4bc981d 100644
--- a/tests/models_ixrt.yaml
+++ b/tests/models_ixrt.yaml
@@ -490,7 +490,6 @@
   need_third_part: true
   precisions:
     - fp16
-    - int8
   relative_path: models/speech/speech_recognition/conformer/ixrt
   task_type: speech/speech_recognition
 - datasets: https://www.openslr.org/33/aishell.tar.gz
-- 
Gitee


From 8dc81e249c3db5882d57ceb61eb095f06e34780f Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Wed, 15 Jan 2025 15:39:50 +0800
Subject: [PATCH 24/35] fix efficientnet bo int8

---
 models/cv/classification/efficientnet_b0/ixrt/README.md        | 3 +--
 models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh    | 3 +--
 .../ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh        | 2 +-
 .../ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh     | 2 +-
 .../ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh        | 2 +-
 .../ixrt/scripts/infer_efficientnet_b0_int8_performance.sh     | 2 +-
 6 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/models/cv/classification/efficientnet_b0/ixrt/README.md b/models/cv/classification/efficientnet_b0/ixrt/README.md
index eb34b2db..84065777 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/README.md
+++ b/models/cv/classification/efficientnet_b0/ixrt/README.md
@@ -27,8 +27,7 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 ### Model Conversion
 
 ```bash
-mkdir -p checkpoints
-python3 export_onnx.py --origin_model /path/to/efficientnet_b0_rwightman-3dd342df.pth --output_model checkpoints/efficientnet_b0.onnx
+python3 export_onnx.py --origin_model /path/to/efficientnet_b0_rwightman-3dd342df.pth --output_model efficientnet_b0.onnx
 ```
 
 ## Inference
diff --git a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
index 75a8391b..ec7e8404 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
@@ -26,5 +26,4 @@ else
 fi
 
 pip install -r requirements.txt
-mkdir checkpoints
-python3 export_onnx.py --origin_model /root/data/checkpoints/efficientnet_b0_rwightman-3dd342df.pth --output_model checkpoints/efficientnet_b0.onnx
\ No newline at end of file
+python3 export_onnx.py --origin_model /root/data/checkpoints/efficientnet_b0_rwightman-3dd342df.pth --output_model efficientnet_b0.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh
index ea8c545f..dfc034b9 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh
@@ -14,7 +14,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 batchsize=32
-model_path="checkpoints/efficientnet_b0"
+model_path="efficientnet_b0"
 # model_path="resnet18"
 datasets_path=${DATASETS_DIR}
 
diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh
index f8844802..818c066d 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh
@@ -14,7 +14,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 batchsize=32
-model_path="checkpoints/efficientnet_b0"
+model_path="efficientnet_b0"
 # model_path="resnet18"
 datasets_path=${DATASETS_DIR}
 
diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh
index 6f3584c9..97892512 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh
@@ -14,7 +14,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 batchsize=32
-model_path="checkpoints/efficientnet_b0"
+model_path="efficientnet_b0"
 # model_path="resnet18"
 datasets_path=${DATASETS_DIR}
 
diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh
index 8989c2db..e016fd1a 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh
@@ -14,7 +14,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 batchsize=32
-model_path="checkpoints/efficientnet_b0"
+model_path="efficientnet_b0"
 # model_path="resnet18"
 datasets_path=${DATASETS_DIR}
 
-- 
Gitee


From 84caa3b0c6d5f3693e634f189dc70fe70c8a511b Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Thu, 16 Jan 2025 10:35:19 +0800
Subject: [PATCH 25/35] update

---
 models/cv/classification/hrnet_w18/ixrt/requirements.txt  | 3 ++-
 models/cv/classification/resnetv1d50/ixrt/README.md       | 8 ++++----
 .../cv/classification/resnetv1d50/ixrt/requirements.txt   | 3 ++-
 .../swin_transformer_large/ixrt/ci/prepare.sh             | 1 -
 .../swin_transformer_large/ixrt/requirements.txt          | 3 ++-
 .../cv/classification/wide_resnet50/ixrt/requirements.txt | 3 ++-
 tests/run_ixrt.py                                         | 2 +-
 7 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/models/cv/classification/hrnet_w18/ixrt/requirements.txt b/models/cv/classification/hrnet_w18/ixrt/requirements.txt
index 96208002..7d0f090a 100644
--- a/models/cv/classification/hrnet_w18/ixrt/requirements.txt
+++ b/models/cv/classification/hrnet_w18/ixrt/requirements.txt
@@ -5,4 +5,5 @@ tabulate
 ppq
 mmpretrain
 mmcv-lite
-pycuda
\ No newline at end of file
+pycuda
+transformers==4.37.1
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/README.md b/models/cv/classification/resnetv1d50/ixrt/README.md
index d4485ebe..3b49e01e 100644
--- a/models/cv/classification/resnetv1d50/ixrt/README.md
+++ b/models/cv/classification/resnetv1d50/ixrt/README.md
@@ -42,18 +42,18 @@ export CONFIG_DIR=config/RESNETV1D50_CONFIG
 
 ```bash
 # Accuracy
-bash scripts/infer_resnet_v1_d50_fp16_accuracy.sh
+bash scripts/infer_resnetv1d50_fp16_accuracy.sh
 # Performance
-bash scripts/infer_resnet_v1_d50_fp16_performance.sh
+bash scripts/infer_resnetv1d50_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_resnet_v1_d50_int8_accuracy.sh
+bash scripts/infer_resnetv1d50_int8_accuracy.sh
 # Performance
-bash scripts/infer_resnet_v1_d50_int8_performance.sh
+bash scripts/infer_resnetv1d50_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/classification/resnetv1d50/ixrt/requirements.txt b/models/cv/classification/resnetv1d50/ixrt/requirements.txt
index 96208002..7d0f090a 100644
--- a/models/cv/classification/resnetv1d50/ixrt/requirements.txt
+++ b/models/cv/classification/resnetv1d50/ixrt/requirements.txt
@@ -5,4 +5,5 @@ tabulate
 ppq
 mmpretrain
 mmcv-lite
-pycuda
\ No newline at end of file
+pycuda
+transformers==4.37.1
\ No newline at end of file
diff --git a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
index 4adb3218..572c069a 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
+++ b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
@@ -31,7 +31,6 @@ mkdir -p general_perf/model_zoo/popular
 mkdir -p general_perf/model_zoo/sota
 
 cp /root/data/3rd_party/swin-large-torch-fp32.json ./
-cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 cp -r /root/data/checkpoints/swin-large ./general_perf/model_zoo/popular/
 
 python3 torch2onnx.py --model_path ./general_perf/model_zoo/popular/swin-large/swin-transformer-large.pt --output_path swin-large-torch-fp32.onnx
diff --git a/models/cv/classification/swin_transformer_large/ixrt/requirements.txt b/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
index 6e905779..c46a0dcf 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
+++ b/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
@@ -6,4 +6,5 @@ pycuda
 onnx
 tabulate
 pycocotools
-opencv-python==4.6.0.66
\ No newline at end of file
+opencv-python==4.6.0.66
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/requirements.txt b/models/cv/classification/wide_resnet50/ixrt/requirements.txt
index 4b82bed8..424e6007 100644
--- a/models/cv/classification/wide_resnet50/ixrt/requirements.txt
+++ b/models/cv/classification/wide_resnet50/ixrt/requirements.txt
@@ -1,3 +1,4 @@
 onnx
 tqdm
-pycuda
\ No newline at end of file
+pycuda
+ppq==0.6.6
\ No newline at end of file
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index 4a2b347c..2481edb6 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -171,7 +171,7 @@ def run_clf_testcase(model):
             script = f"""
             cd ../{model['relative_path']}
             export ORIGIN_ONNX_NAME=./swin-large-torch-fp32
-            export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+            export OPTIMIER_FILE=/root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py
             export PROJ_PATH=./
             bash scripts/infer_swinl_fp16_performance.sh
             cd ./ByteMLPerf/byte_infer_perf/general_perf
-- 
Gitee


From 8c0ca1bb1bfe33324cd74fc0ac3e77e9adde5b00 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Thu, 16 Jan 2025 13:58:06 +0800
Subject: [PATCH 26/35] fix model

---
 models/cv/classification/repvgg/ixrt/export_onnx.py | 2 +-
 models/cv/detection/yolov5s/ixrt/requirements.txt   | 3 ++-
 tests/run_ixrt.py                                   | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/models/cv/classification/repvgg/ixrt/export_onnx.py b/models/cv/classification/repvgg/ixrt/export_onnx.py
index 254f3a0e..9ba381cc 100644
--- a/models/cv/classification/repvgg/ixrt/export_onnx.py
+++ b/models/cv/classification/repvgg/ixrt/export_onnx.py
@@ -42,7 +42,7 @@ args = parse_args()
 config_file = args.config_file
 checkpoint_file = args.checkpoint_file
 model = Model().eval()
-x = torch.zeros(1, 3, 224, 224).to(device)
+x = torch.zeros(32, 3, 224, 224).to(device)
 with torch.no_grad():
     output = model(x)
   
diff --git a/models/cv/detection/yolov5s/ixrt/requirements.txt b/models/cv/detection/yolov5s/ixrt/requirements.txt
index 4f51a92e..a6188db8 100644
--- a/models/cv/detection/yolov5s/ixrt/requirements.txt
+++ b/models/cv/detection/yolov5s/ixrt/requirements.txt
@@ -2,4 +2,5 @@ tqdm
 onnx
 onnxsim
 ultralytics
-pycocotools
\ No newline at end of file
+pycocotools
+pycuda
\ No newline at end of file
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index 2481edb6..a11ea1d5 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -266,10 +266,10 @@ def run_detec_testcase(model):
         for m in matchs:
             result["result"].setdefault(prec, {})
             try:
-                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1])}
+                result["result"][prec] = result["result"][prec][m[0]] = float(m[1])
             except ValueError:
                 print("The string cannot be converted to a float.")
-                result["result"][prec] = result["result"][prec] | {m[0]: m[1]}
+                result["result"][prec] = result["result"][prec][m[0]] = m[1]
         if matchs and len(matchs) == 2:
             result["result"][prec]["status"] = "PASS"
         else:
-- 
Gitee


From 5831138c4afb516804fc4233aa97daeacbc2f31f Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Thu, 16 Jan 2025 14:12:27 +0800
Subject: [PATCH 27/35] install ixrt run

---
 models/cv/detection/centernet/ixrt/requirements.txt           | 3 ++-
 models/cv/detection/yolox/ixrt/ci/prepare.sh                  | 2 ++
 models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh           | 3 +++
 models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh  | 3 +++
 models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh | 3 +++
 tests/run_ixrt.py                                             | 4 ++--
 6 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/models/cv/detection/centernet/ixrt/requirements.txt b/models/cv/detection/centernet/ixrt/requirements.txt
index 91b58790..9178d0b6 100644
--- a/models/cv/detection/centernet/ixrt/requirements.txt
+++ b/models/cv/detection/centernet/ixrt/requirements.txt
@@ -2,4 +2,5 @@ onnx
 tqdm
 mmdet
 mmdeploy
-mmengine
\ No newline at end of file
+mmengine
+transformers==4.37.1
\ No newline at end of file
diff --git a/models/cv/detection/yolox/ixrt/ci/prepare.sh b/models/cv/detection/yolox/ixrt/ci/prepare.sh
index 26371b34..cb7a30d7 100644
--- a/models/cv/detection/yolox/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolox/ixrt/ci/prepare.sh
@@ -28,6 +28,8 @@ fi
 pip install -r requirements.txt
 unzip /root/data/repos/yolox-f00a798c8bf59f43ab557a2f3d566afa831c8887.zip -d ./
 ln -s /root/data/checkpoints/yolox_m.pth ./YOLOX/
+# install ixrt run
+bash /root/data/3rd_party/ixrt-0.10.0+corex.4.2.0.20250115-linux_x86_64.run
 cd YOLOX && python3 setup.py develop && python3 tools/export_onnx.py --output-name ../yolox.onnx -n yolox-m -c yolox_m.pth --batch-size 32
 if [ "$1" = "nvidia" ]; then
     cd ../plugin && mkdir -p build && cd build && cmake .. -DUSE_TRT=1 && make -j12
diff --git a/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh b/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh
index 66a85756..022d4d31 100644
--- a/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh
+++ b/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh
@@ -18,6 +18,9 @@ set -x
 
 ln -s /root/data/checkpoints/maskrcnn.wts ./python/
 ln -s /root/data/datasets/coco ./coco
+# install ixrt run
+bash /root/data/3rd_party/ixrt-0.10.0+corex.4.2.0.20250115-linux_x86_64.run
+
 if [ "$1" = "nvidia" ]; then
     cd scripts && bash init_nv.sh
 else
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh b/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh
index ddb6742b..e1d8b7f3 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh
@@ -27,6 +27,9 @@ fi
 
 pip install -r requirements.txt
 
+# install ixrt run
+bash /root/data/3rd_party/ixrt-0.10.0+corex.4.2.0.20250115-linux_x86_64.run
+
 if [ "$1" = "nvidia" ]; then
     cmake -S . -B build -DUSE_TENSORRT=true
     cmake --build build -j16
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh b/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh
index e9c50170..979f9050 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh
@@ -25,6 +25,9 @@ else
     echo "Not Support Os"
 fi
 
+# install ixrt run
+bash /root/data/3rd_party/ixrt-0.10.0+corex.4.2.0.20250115-linux_x86_64.run
+
 if [ "$1" = "nvidia" ]; then
     cmake -S . -B build -DUSE_TENSORRT=true
     cmake --build build -j16
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index a11ea1d5..c1324d15 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -266,10 +266,10 @@ def run_detec_testcase(model):
         for m in matchs:
             result["result"].setdefault(prec, {})
             try:
-                result["result"][prec] = result["result"][prec][m[0]] = float(m[1])
+                result["result"][prec][m[0]] = float(m[1])
             except ValueError:
                 print("The string cannot be converted to a float.")
-                result["result"][prec] = result["result"][prec][m[0]] = m[1]
+                result["result"][prec][m[0]] = m[1]
         if matchs and len(matchs) == 2:
             result["result"][prec]["status"] = "PASS"
         else:
-- 
Gitee


From cb651965f699397c3f16b98cc6f3ddd2f6bb99b0 Mon Sep 17 00:00:00 2001
From: "xinchi.tian" <xinchi.tian@iluvatar.com>
Date: Thu, 16 Jan 2025 19:02:35 +0800
Subject: [PATCH 28/35] Fix compile error

---
 .../ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu       | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/models/nlp/language_model/bert_base_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu b/models/nlp/language_model/bert_base_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
index 5c4d5c53..2330debf 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
+++ b/models/nlp/language_model/bert_base_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
@@ -284,7 +284,7 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
         case 64:
         case 128:
         case 192:
-        case 256:
+        case 256: {
             cuinferFlashAttnConfigInfo flashAttnInfo;
             flashAttnInfo.scaling = sqrt(1.f / (head_dim * 1.0));
             flashAttnInfo.quantParam.q_amax = arrange_qkv_amax;
@@ -318,7 +318,8 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
             CUINFER_CHECK(cuinferFMHAForwardEx(cuinfer_handle, flashAttnInfo, qDesc, q_buffer, kDesc, k_buffer, vDesc,
                                                v_buffer, maskDesc, mask, oDesc, qk_buffer));
             break;
-        default:
+        }
+        default: {
             cuinfer_i8_gemm(k_buffer, q_buffer, nullptr, qkv_buffer, batch_size * head_num, batch_seq_len,
                             batch_seq_len, head_dim, batch_seq_len * head_dim, batch_seq_len * head_dim,
                             batch_seq_len * batch_seq_len, scaleBmm1, 0.0, 0, cuinfer_handle, stream);
@@ -330,6 +331,7 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
                                batch_seq_len, batch_seq_len * head_dim, batch_seq_len * batch_seq_len,
                                batch_seq_len * head_dim, scaleBmm2, cuinfer_handle, stream);
             break;
+        }
     }
 
     IxinferArrangeAttenOutputI8II8O(batch_token_num, hidden_size, stream, qk_buffer, qkv_out, batch_seq_len, head_dim,
-- 
Gitee


From 0f1fdce4866507f7607245e128a4e4afdf8c97a8 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Fri, 17 Jan 2025 10:31:40 +0800
Subject: [PATCH 29/35] update bert

---
 .../bert_base_squad/ixrt/python/builder.py    | 398 -----------------
 .../ixrt/python/builder_int8.py               | 415 -----------------
 .../ixrt/python/builder_utils.py              | 219 ---------
 .../ixrt/python/builder_utils_int8.py         | 209 ---------
 .../ixrt/python/evaluate-v1.1.py              | 130 ------
 .../bert_base_squad/ixrt/python/inference.py  | 417 ------------------
 .../ixrt/python/ixrt/evaluate-v1.1.py         |   5 +-
 .../ixrt/python/ixrt/inference.py             |   5 +
 .../ixrt/python/{ => ixrt}/perf.py            |   0
 .../ixrt/python/load_ixrt_plugin.py           |  44 --
 .../ixrt/python/evaluate-v1.1.py              |   4 +
 .../qkv_to_context/qkvToContextInt8Plugin.cu  |   6 +-
 .../conformer/ixrt/ci/prepare.sh              |   2 +
 .../conformer/ixrt/ixrt_inference_accuracy.py |   1 +
 tests/run_ixrt.py                             |  11 +-
 15 files changed, 26 insertions(+), 1840 deletions(-)
 delete mode 100644 models/nlp/language_model/bert_base_squad/ixrt/python/builder.py
 delete mode 100644 models/nlp/language_model/bert_base_squad/ixrt/python/builder_int8.py
 delete mode 100644 models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils.py
 delete mode 100644 models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils_int8.py
 delete mode 100644 models/nlp/language_model/bert_base_squad/ixrt/python/evaluate-v1.1.py
 delete mode 100644 models/nlp/language_model/bert_base_squad/ixrt/python/inference.py
 rename models/nlp/language_model/bert_base_squad/ixrt/python/{ => ixrt}/perf.py (100%)
 delete mode 100644 models/nlp/language_model/bert_base_squad/ixrt/python/load_ixrt_plugin.py

diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/builder.py b/models/nlp/language_model/bert_base_squad/ixrt/python/builder.py
deleted file mode 100644
index bd932d48..00000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/builder.py
+++ /dev/null
@@ -1,398 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import argparse
-import json
-import tensorrt as trt
-import time
-import sys
-import ctypes
-import os
-import numpy as np
-from builder_utils import load_onnx_weights_and_quant, load_pytorch_weights_and_quant
-from builder_utils import WQKV, BQKV  # Attention Keys
-from builder_utils import W_AOUT, B_AOUT, W_MID, B_MID, W_LOUT, B_LOUT  # Transformer Keys
-from builder_utils import SQD_W, SQD_B  # SQuAD Output Keys
-
-trt_version = [int(n) for n in trt.__version__.split('.')]
-plugin_lib_name = "libnvinfer_plugin.so" if os.getenv('USE_TRT') == 'True' else "libixrt_plugin.so"
-print(plugin_lib_name)
-
-TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
-from load_ixrt_plugin import load_ixrt_plugin, is_nvidia_platform
-load_ixrt_plugin(TRT_LOGGER)
-
-plg_registry = trt.get_plugin_registry()
-registry_list = plg_registry.plugin_creator_list
-print("registry_list: ", [registry.name + '/' + registry.plugin_version for registry in registry_list])
-emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic_IxRT", "1", "")
-qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic_IxRT", "1", "")
-skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic_IxRT", "1", "")
-ffn_plg_creator = plg_registry.get_plugin_creator("CustomFFNPluginDynamic_IxRT", "1", "")
-gelu_plg_creator = plg_registry.get_plugin_creator("CustomGeluPluginDynamic_IxRT", "1", "")
-fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic_IxRT", "1", "")
-
-class BertConfig:
-    def __init__(self, bert_config_path, use_fp16, use_trt):
-        with open(bert_config_path, "r") as f:
-            data = json.load(f)
-            self.num_attention_heads = data["num_attention_heads"]
-            self.hidden_size = data["hidden_size"]
-            self.intermediate_size = data["intermediate_size"]
-            self.num_hidden_layers = data["num_hidden_layers"]
-            self.head_size = self.hidden_size // self.num_attention_heads
-            self.use_fp16 = use_fp16
-            self.use_trt = use_trt
-
-def set_tensor_name(tensor, prefix, name):
-    tensor.name = prefix + name
-
-def set_output_name(layer, prefix, name, out_idx = 0):
-    set_tensor_name(layer.get_output(out_idx), prefix, name)
-
-def set_output_range(layer, maxval, out_idx = 0):
-    layer.get_output(out_idx).set_dynamic_range(-maxval, maxval)
-
-def get_mha_dtype(config):
-    dtype = trt.float32
-    if config.use_fp16:
-        dtype = trt.float16
-    return int(dtype)
-
-def custom_fc(network, input_tensor, out_dims, W, B):
-    pf_out_dims = trt.PluginField("out_dims", np.array(out_dims, dtype=np.int32), trt.PluginFieldType.INT32)
-    pf_type = trt.PluginField("type_id", np.array(int(trt.float16), dtype=np.int32), trt.PluginFieldType.INT32)
-    pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32)
-    fields = [pf_out_dims, pf_type, pf_W]
-    if B is not None:
-        pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32)
-        fields.append(pf_B)
-
-    pfc = trt.PluginFieldCollection(fields)
-    fc_plugin = fc_plg_creator.create_plugin("fcplugin", pfc)
-    plug_inputs = [input_tensor]
-    out_dense = network.add_plugin_v2(plug_inputs, fc_plugin)
-    return out_dense
-
-def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
-    """
-    Add the attention layer
-    """
-    assert(len(input_tensor.shape) == 5)
-    B, S, hidden_size, _, _ = input_tensor.shape
-    num_heads = config.num_attention_heads
-    head_size = int(hidden_size / num_heads)
-
-    Wall = init_dict[prefix + WQKV]
-    Ball = init_dict[prefix + BQKV]
-
-    # FC_attention
-    if config.use_trt:
-        mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball)
-    else:
-        mult_all = custom_fc(network, input_tensor, 3 * hidden_size, Wall, Ball)
-
-    has_mask = imask is not None
-    # QKV2CTX
-    pf_type = trt.PluginField("type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32)
-    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
-    pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32)
-    pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32)
-    pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type])
-    qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc)
-
-    qkv_in = [mult_all.get_output(0)]
-    if has_mask:
-        qkv_in.append(imask)
-    qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug)
-    return qkv2ctx
-
-
-def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None):
-    """
-    Add the skip layer
-    """
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    hidden_size = idims[2]
-
-    dtype = trt.float32
-    if config.use_fp16:
-        dtype = trt.float16
-
-    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
-    wbeta = init_dict[prefix + "beta"]
-    pf_beta = trt.PluginField("beta", wbeta, trt.PluginFieldType.FLOAT32)
-    wgamma = init_dict[prefix + "gamma"]
-    pf_gamma = trt.PluginField("gamma", wgamma, trt.PluginFieldType.FLOAT32)
-    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
-
-    fields = [pf_ld, pf_beta, pf_gamma, pf_type ]
-
-    if bias is not None:
-        pf_bias = trt.PluginField("bias", bias, trt.PluginFieldType.FLOAT32)
-        fields.append(pf_bias)
-
-    pfc = trt.PluginFieldCollection(fields)
-    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)
-
-    skipln_inputs = [input_tensor, skip]
-    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
-    return layer
-
-def ffn_trt(prefix, config, init_dict, network, input_tensor):
-     # FC1 + GELU
-    B_mid = init_dict[prefix + B_MID]
-    W_mid = init_dict[prefix + W_MID]
-    mid_dense = network.add_fully_connected(input_tensor, config.intermediate_size, W_mid, B_mid)
-
-    dtype = trt.float32
-    if config.use_fp16:
-        dtype = trt.float16
-    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
-    pf_ld = trt.PluginField("ld", np.array([config.hidden_size], np.int32), trt.PluginFieldType.INT32)
-
-    pfc = trt.PluginFieldCollection([pf_type, pf_ld])
-    gelu_plug = gelu_plg_creator.create_plugin("gelu", pfc)
-
-    gelu_inputs = [mid_dense.get_output(0)]
-    gelu_layer = network.add_plugin_v2(gelu_inputs, gelu_plug)
-
-    intermediate_act = gelu_layer.get_output(0)
-
-    # FC2
-    # Dense to hidden size
-    B_lout = init_dict[prefix + B_LOUT]
-    W_lout = init_dict[prefix + W_LOUT]
-    out_dense = network.add_fully_connected(intermediate_act, config.hidden_size, W_lout, B_lout)
-    B_lout = None
-
-    out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, out_dense.get_output(0), input_tensor, B_lout)
-    return out_layer
-
-def ffn(prefix, config, init_dict, network, input_tensor):
-    # FC1 + GELU
-    B_mid = init_dict[prefix + B_MID]
-    W_mid = init_dict[prefix + W_MID]
-    B_lout = init_dict[prefix + B_LOUT]
-    W_lout = init_dict[prefix + W_LOUT]
-    pf_out_dim = trt.PluginField("out_dims", np.array(config.hidden_size, np.int32), trt.PluginFieldType.INT32)
-    pf_type = trt.PluginField("type_id", np.array(int(trt.float16), np.int32), trt.PluginFieldType.INT32)
-    pf_W1 = trt.PluginField("W1", W_mid, trt.PluginFieldType.FLOAT32)
-    pf_W2 = trt.PluginField("W2", W_lout, trt.PluginFieldType.FLOAT32)
-    pf_B1 = trt.PluginField("B1", B_mid, trt.PluginFieldType.FLOAT32)
-    pf_act_type = trt.PluginField("act_type", np.array(int(3), np.int32), trt.PluginFieldType.INT32)
-    pfc = trt.PluginFieldCollection([pf_out_dim, pf_type, pf_W1, pf_W2, pf_B1, pf_act_type])
-    ffn_plug = ffn_plg_creator.create_plugin("ffn", pfc)
-
-    ffn_inputs = [input_tensor]
-    ffn_layer = network.add_plugin_v2(ffn_inputs, ffn_plug)
-
-    out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, ffn_layer.get_output(0), input_tensor, B_lout)
-    return out_layer
-
-def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
-    """
-    Add the transformer layer
-    """
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    hidden_size = idims[2]
-
-    context_transposed = attention_layer_opt(prefix + "attention_", config, init_dict, network, input_tensor, imask)
-    attention_heads = context_transposed.get_output(0)
-    
-    # FC0
-    B_aout = init_dict[prefix + B_AOUT]
-    W_aout = init_dict[prefix + W_AOUT]
-    if config.use_trt:
-        attention_out_fc = network.add_fully_connected(attention_heads, hidden_size, W_aout, B_aout)
-    else:
-        attention_out_fc = custom_fc(network, attention_heads, hidden_size, W_aout, B_aout)
-    B_aout = None     
-    
-    skiplayer = skipln(prefix + "attention_output_layernorm_",config, init_dict, network, attention_out_fc.get_output(0), input_tensor, B_aout)
-    attention_ln = skiplayer.get_output(0)
-
-    if config.use_trt:
-        ffn_layer = ffn_trt(prefix, config, init_dict, network, attention_ln)
-    else:
-        ffn_layer = ffn(prefix, config, init_dict, network, attention_ln)
-    return ffn_layer
-
-def bert_model(config, init_dict, network, input_tensor, input_mask):
-    """
-    Create the bert model
-    """
-    prev_input = input_tensor
-    for layer in range(0, config.num_hidden_layers):
-        ss = "l{}_".format(layer)   
-        out_layer = transformer_layer_opt(ss, config,  init_dict, network, prev_input, input_mask)
-        prev_input = out_layer.get_output(0)
-    return prev_input
-
-def squad_output(prefix, config, init_dict, network, input_tensor):
-    """
-    Create the squad output
-    """
-
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    B, S, hidden_size, _, _ = idims
-
-    W_out = init_dict[prefix + SQD_W]
-    B_out = init_dict[prefix + SQD_B]
-
-    if config.use_trt:
-        dense = network.add_fully_connected(input_tensor, 2, W_out, B_out)
-    else:
-        dense = custom_fc(network, input_tensor, 2, W_out, B_out)
-
-    return dense
-
-def emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes):
-    input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-    segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-    input_mask = network.add_input(name="input_mask", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-
-    if len(sequence_lengths) > 1:
-        profile = builder.create_optimization_profile()
-        min_shape = (batch_sizes[0], sequence_lengths[0])
-        opt_shape = (batch_sizes[1], sequence_lengths[1])
-        max_shape = (batch_sizes[2], sequence_lengths[2])
-        assert(sequence_lengths[0] <= sequence_lengths[1] and sequence_lengths[1] <= sequence_lengths[2])
-        
-        print('set dynamic shape -> ', min_shape, opt_shape, max_shape)
-        profile.set_shape("input_ids", min_shape, opt_shape, max_shape)
-        profile.set_shape("segment_ids", min_shape, opt_shape, max_shape)
-        profile.set_shape("input_mask", min_shape, opt_shape, max_shape)
-        builder_config.add_optimization_profile(profile)
-
-    wbeta = trt.PluginField("bert_embeddings_layernorm_beta", weights_dict["bert_embeddings_layernorm_beta"], trt.PluginFieldType.FLOAT32)
-    wgamma = trt.PluginField("bert_embeddings_layernorm_gamma", weights_dict["bert_embeddings_layernorm_gamma"], trt.PluginFieldType.FLOAT32)
-    wwordemb = trt.PluginField("bert_embeddings_word_embeddings", weights_dict["bert_embeddings_word_embeddings"], trt.PluginFieldType.FLOAT32)
-    wtokemb = trt.PluginField("bert_embeddings_token_type_embeddings", weights_dict["bert_embeddings_token_type_embeddings"], trt.PluginFieldType.FLOAT32)
-    wposemb = trt.PluginField("bert_embeddings_position_embeddings", weights_dict["bert_embeddings_position_embeddings"], trt.PluginFieldType.FLOAT32)
-
-    output_fp16 = trt.PluginField("output_fp16", np.array([1 if config.use_fp16 else 0]).astype(np.int32), trt.PluginFieldType.INT32)
-    mha_type = trt.PluginField("mha_type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32)
-
-    pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16, mha_type])
-    fn = emln_plg_creator.create_plugin("embeddings", pfc)
-
-    inputs = [input_ids, segment_ids, input_mask]
-    emb_layer = network.add_plugin_v2(inputs, fn)
-    return emb_layer
-
-def build_engine(batch_sizes, sequence_lengths, config, weights_dict):
-    explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-
-    builder = trt.Builder(TRT_LOGGER)
-    with builder.create_network(explicit_batch_flag) as network, builder.create_builder_config() as builder_config:
-        if config.use_fp16:
-            builder_config.set_flag(trt.BuilderFlag.FP16)
-
-        # Create the network
-        emb_layer = emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes)
-        embeddings = emb_layer.get_output(0)
-        mask_idx = emb_layer.get_output(1)
-        
-        bert_out = bert_model(config, weights_dict, network, embeddings, mask_idx)
-
-        squad_logits = squad_output("cls_", config, weights_dict, network, bert_out)
-        squad_logits_out = squad_logits.get_output(0)
-
-        network.mark_output(squad_logits_out)
-
-        build_start_time = time.time()
-        engine = builder.build_engine(network, builder_config)
-        build_time_elapsed = (time.time() - build_start_time)
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "build engine in {:.3f} Sec".format(build_time_elapsed))
-        return engine
-    
-def str2bool(v):
-    return v.lower() in ('yes', 'true')    
-
-def main():
-    parser = argparse.ArgumentParser(description="TensorRT BERT Sample", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("-z", "--use_trt", type=str2bool, default=False, help = "Whether to use tensorRT or IxRT")
-    parser.add_argument("-x", "--onnx", required=False, help="The ONNX model file path.")
-    parser.add_argument("-pt", "--pytorch", required=False, help="The PyTorch checkpoint file path.")
-    parser.add_argument("-o", "--output", required=True, default="bert_base_384.engine", help="The bert engine file, ex bert.engine")
-    parser.add_argument("-b", "--batch-size", nargs='+', help="Batch size(s) to optimize for. The engine will be usable with any batch size below this, but may not be optimal for smaller sizes. Can be specified multiple times to optimize for more than one batch size.", type=int)
-    parser.add_argument("-s", "--sequence-length", nargs='+', help="Sequence length of the BERT model", type=int)
-    parser.add_argument("-c", "--config-dir", required=True,
-                        help="The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google")
-    parser.add_argument("-f", "--fp16", action="store_true", help="Indicates that inference should be run in FP16 precision", required=False)
-    parser.add_argument("-j", "--squad-json", default="squad/dev-v1.1.json", help="squad json dataset used for int8 calibration", required=False)
-    parser.add_argument("-v", "--vocab-file", default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", help="Path to file containing entire understandable vocab", required=False)
-    parser.add_argument("--verbose", action="store_true", help="Turn on verbose logger and set profiling verbosity to DETAILED", required=False)
-
-    args, _ = parser.parse_known_args()
-    args.batch_size = args.batch_size or [1]
-    args.sequence_length = args.sequence_length or [128]
-    args.use_trt = is_nvidia_platform()
-
-    if len(args.sequence_length) not in [1, 3]:
-        print("Error: You must provide <args.sequence_length> either one or three integers.")
-        sys.exit(1)
-
-    if len(args.batch_size) not in [1, 3]:
-        print("Error: You must provide <args.batch_size> either one or three integers.")
-        sys.exit(1)
-
-    if args.verbose:
-        TRT_LOGGER.min_severity = TRT_LOGGER.VERBOSE
-
-    bert_config_path = args.config_dir
-    TRT_LOGGER.log(TRT_LOGGER.INFO, "Using configuration file: {:}".format(bert_config_path))
-
-    config = BertConfig(bert_config_path, args.fp16, args.use_trt)
-
-    if args.onnx != None:
-        weights_dict = load_onnx_weights_and_quant(args.onnx, config)
-    elif args.pytorch != None:
-        weights_dict = load_pytorch_weights_and_quant(args.pytorch, config)
-    else:
-        raise RuntimeError("You need either specify TF checkpoint using option --ckpt or ONNX using option --onnx to build TRT BERT model.")
-
-    with build_engine(args.batch_size, args.sequence_length, config, weights_dict) as engine:
-        TRT_LOGGER.log(TRT_LOGGER.VERBOSE, "Serializing Engine...")
-        serialized_engine = engine.serialize()
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "Saving Engine to {:}".format(args.output))
-        with open(args.output, "wb") as fout:
-            fout.write(serialized_engine)
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "Done.")
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_int8.py b/models/nlp/language_model/bert_base_squad/ixrt/python/builder_int8.py
deleted file mode 100644
index e51d7c40..00000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_int8.py
+++ /dev/null
@@ -1,415 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import argparse
-import json
-import tensorrt as trt
-import time
-import sys
-import ctypes
-import os
-import numpy as np
-from builder_utils_int8 import load_pytorch_weights_and_quant
-from builder_utils_int8 import WQKV, BQKV  # Attention Keys
-from builder_utils_int8 import W_AOUT, B_AOUT, W_MID, B_MID, W_LOUT, B_LOUT  # Transformer Keys
-from builder_utils_int8 import SQD_W, SQD_B  # SQuAD Output Keys
-
-trt_version = [int(n) for n in trt.__version__.split('.')]
-
-TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
-from load_ixrt_plugin import load_ixrt_plugin
-load_ixrt_plugin(TRT_LOGGER)
-
-plg_registry = trt.get_plugin_registry()
-registry_list = plg_registry.plugin_creator_list
-print("registry_list: ", [registry.name + '/' + registry.plugin_version for registry in registry_list])
-emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic_IxRT", "2", "")
-qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic_IxRT", "3", "")
-skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic_IxRT", "3", "")
-gelu_plg_creator = plg_registry.get_plugin_creator("CustomGeluPluginDynamic_IxRT", "1", "")
-fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic_IxRT", "2", "")
-
-# 
-class BertConfig:
-    def __init__(self, bert_config_path, use_int8):
-        with open(bert_config_path, "r") as f:
-            data = json.load(f)
-            self.num_attention_heads = data["num_attention_heads"]
-            self.hidden_size = data["hidden_size"]
-            self.intermediate_size = data["intermediate_size"]
-            self.num_hidden_layers = data["num_hidden_layers"]
-            self.head_size = self.hidden_size // self.num_attention_heads
-            self.use_int8 = use_int8
-
-def set_tensor_name(tensor, prefix, name):
-    tensor.name = prefix + name
-
-def set_output_name(layer, prefix, name, out_idx = 0):
-    set_tensor_name(layer.get_output(out_idx), prefix, name)
-
-def set_output_range(layer, maxval, out_idx = 0):
-    layer.get_output(out_idx).set_dynamic_range(-maxval, maxval)
-
-def get_mha_dtype(config):
-    dtype = trt.float32
-    if config.use_int8:
-        dtype = trt.int8
-    return int(dtype)
-
-def custom_fc(prefix, config, init_dict, network, input_tensor, out_dims, W, B):
-    pf_out_dims = trt.PluginField("out_dims", np.array([out_dims], dtype=np.int32), trt.PluginFieldType.INT32)
-    pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32)
-    
-    fields = [pf_out_dims, pf_W]
-
-    if config.use_int8:
-        amax_vec = [init_dict[prefix + "wei_amax"]]
-        if B is not None:
-            pf_B = trt.PluginField("Bias", B, trt.PluginFieldType.FLOAT32)
-            amax_vec.append(init_dict[prefix + "out_amax"])
-            pf_amax = trt.PluginField("fc_amax", np.array(amax_vec, np.float32), trt.PluginFieldType.FLOAT32)
-            fields.append(pf_B)
-            fields.append(pf_amax)
-        else:
-            pf_amax = trt.PluginField("fc_amax", np.array(amax_vec, np.float32), trt.PluginFieldType.FLOAT32)
-            fields.append(pf_amax)
-
-    pfc = trt.PluginFieldCollection(fields)
-    fc_plugin = fc_plg_creator.create_plugin("fcplugin", pfc)
-    plug_inputs = [input_tensor]
-    out_dense = network.add_plugin_v2(plug_inputs, fc_plugin)
-    return out_dense
-
-def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
-    """
-    Add the attention layer
-    """
-    assert(len(input_tensor.shape) == 5)
-    B, S, hidden_size, _, _ = input_tensor.shape
-    num_heads = config.num_attention_heads
-    head_size = int(hidden_size / num_heads)
-
-    Wall = init_dict[prefix + WQKV]
-    Ball = init_dict[prefix + BQKV]
-
-    # FC_attention
-    mult_all = custom_fc(prefix + "self_qkv_", config, init_dict, network, input_tensor, 3*hidden_size, Wall, Ball)
-    set_output_range(mult_all, init_dict[prefix + "self_qkv_out_amax"])
-
-    has_mask = imask is not None
-
-    # QKV2CTX
-    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
-    pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32)
-    fields = [pf_hidden_size, pf_num_heads]
-    dq_probs = [
-                init_dict[prefix + "arrange_qkv_amax"],
-                init_dict[prefix + "softmax_in_amax"],
-                init_dict[prefix + "softmax_out_amax"] 
-                ]
-    pf_dq = trt.PluginField("dq_probs", np.array(dq_probs, np.float32), trt.PluginFieldType.FLOAT32)
-    fields.append(pf_dq)
-    
-    pfc = trt.PluginFieldCollection(fields)
-    qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc)
-
-    qkv_in = [mult_all.get_output(0)]
-    if has_mask:
-        qkv_in.append(imask)
-    qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug)
-    if config.use_int8:
-        set_output_range(qkv2ctx, init_dict[prefix + "output_dense_in_amax"])
-    return qkv2ctx
-
-
-def skipln(prefix, config, init_dict, network, input_tensor, skip, residual, is_last_layer, bias=None):
-    """
-    Add the skip layer
-    """
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    hidden_size = idims[2]
-
-    dtype = trt.float32
-    if config.use_int8:
-        dtype = trt.int8
-
-    wbeta = init_dict[prefix + "beta"]
-    wgamma = init_dict[prefix + "gamma"]
-
-    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
-    pf_beta = trt.PluginField("beta", wbeta, trt.PluginFieldType.FLOAT32)
-    pf_gamma = trt.PluginField("gamma", wgamma, trt.PluginFieldType.FLOAT32)
-    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
-
-    fields = [pf_ld, pf_beta, pf_gamma, pf_type ]
-    if bias is not None:
-        pf_bias = trt.PluginField("bias", bias, trt.PluginFieldType.FLOAT32)
-        fields.append(pf_bias)
-    if is_last_layer:
-        pf_fp32 = trt.PluginField("output_fp32", np.array([1], np.int32), trt.PluginFieldType.INT32)
-        fields.append(pf_fp32)
-
-    pfc = trt.PluginFieldCollection(fields)
-    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)
-
-    skipln_inputs = [input_tensor, skip]
-    if config.use_int8:
-        skipln_inputs.append(residual)
-    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
-    return layer
-
-def ffn(prefix, config, init_dict, network, input_tensor, residual, is_last_layer):
-     # FC1 + GELU
-    B_mid = init_dict[prefix + B_MID]
-    W_mid = init_dict[prefix + W_MID]
-
-    mid_dense = custom_fc(prefix + "intermediate_dense_", config, init_dict, network, input_tensor, config.intermediate_size, W_mid, None)
-    set_output_range(mid_dense, init_dict[prefix + "intermediate_dense_out_amax"])
-
-    dtype = trt.float32
-
-    if config.use_int8:
-        dtype = trt.int8
-
-    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
-    pf_ld = trt.PluginField("ld", np.array([int(config.intermediate_size)], np.int32), trt.PluginFieldType.INT32)
-    fields = [pf_type, pf_ld]
-    if config.use_int8:
-        pf_bias = trt.PluginField("bias", B_mid, trt.PluginFieldType.FLOAT32)
-        fields.append(pf_bias)
-    
-    pfc = trt.PluginFieldCollection(fields)
-    gelu_plug = gelu_plg_creator.create_plugin("gelu", pfc)
-
-    gelu_inputs = [mid_dense.get_output(0)]
-    gelu_layer = network.add_plugin_v2(gelu_inputs, gelu_plug)
-
-    if config.use_int8:
-        set_output_range(gelu_layer, init_dict[prefix + "output_dense_in_amax"])
-
-    intermediate_act = gelu_layer.get_output(0)
-    # set_tensor_name(intermediate_act, prefix, "gelu")
-
-    # FC2
-    # Dense to hidden size
-    B_lout = init_dict[prefix + B_LOUT]
-    W_lout = init_dict[prefix + W_LOUT]
-    out_dense = custom_fc(prefix + "output_dense_", config, init_dict, network, intermediate_act, config.hidden_size, W_lout, None)
-    set_output_range(out_dense, init_dict[prefix + "output_dense_out_amax"])
-
-    out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, out_dense.get_output(0), input_tensor, residual, is_last_layer, B_lout)
-    return out_layer
-
-def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imask, residual, is_last_layer):
-    """
-    Add the transformer layer
-    """
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    hidden_size = idims[2]
-
-    context_transposed = attention_layer_opt(prefix + "attention_", config, init_dict, network, input_tensor, imask)
-    attention_heads = context_transposed.get_output(0)
-    
-    # FC0
-    B_aout = init_dict[prefix + B_AOUT]
-    W_aout = init_dict[prefix + W_AOUT]
-    attention_out_fc = custom_fc(prefix + "attention_output_dense_", config, init_dict, network, attention_heads, hidden_size, W_aout, None)
-    set_output_range(attention_out_fc, init_dict[prefix + "attention_output_dense_out_amax"])   
-    
-    skiplayer = skipln(prefix + "attention_output_layernorm_", config, init_dict, network, attention_out_fc.get_output(0), input_tensor, residual, False, B_aout)
-    if config.use_int8:
-        set_output_range(skiplayer, init_dict[prefix + "intermediate_dense_in_amax"])
-    
-    ffn_layer = ffn(prefix, config, init_dict, network, skiplayer.get_output(0), skiplayer.get_output(1), is_last_layer)
-    return ffn_layer
-
-def bert_model(config, init_dict, network, input_tensor, input_mask, residual):
-    """
-    Create the bert model
-    """
-    prev_input = input_tensor
-    for layer in range(0, config.num_hidden_layers):
-        ss = "l{}_".format(layer) 
-        out_layer = transformer_layer_opt(ss, config,  init_dict, network, prev_input, input_mask, residual,
-                                          True if config.use_int8 and layer == config.num_hidden_layers - 1 else False)
-        prev_input = out_layer.get_output(0)
-        residual = None
-        if config.use_int8:
-            residual = out_layer.get_output(1)
-        if layer < config.num_hidden_layers - 1:
-            set_output_range(out_layer, init_dict["l{}_".format(layer+1) + "attention_self_qkv_in_amax"])
-        else:
-            set_output_range(out_layer, 1)
-
-    return prev_input
-
-def squad_output(prefix, config, init_dict, network, input_tensor):
-    """
-    Create the squad output
-    """
-
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    B, S, hidden_size, _, _ = idims
-
-    W_out = init_dict[prefix + SQD_W]
-    B_out = init_dict[prefix + SQD_B]
-
-    dense = network.add_fully_connected(input_tensor, 2, W_out, B_out)
-    return dense
-
-def emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes):
-    input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-    segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-    input_mask = network.add_input(name="input_mask", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-
-    if len(sequence_lengths) > 1:
-        profile = builder.create_optimization_profile()
-        min_shape = (batch_sizes[0], sequence_lengths[0])
-        opt_shape = (batch_sizes[1], sequence_lengths[1])
-        max_shape = (batch_sizes[2], sequence_lengths[2])
-        assert(sequence_lengths[0] <= sequence_lengths[1] and sequence_lengths[1] <= sequence_lengths[2])
-        
-        print('set dynamic shape -> ', min_shape, opt_shape, max_shape)
-        profile.set_shape("input_ids", min_shape, opt_shape, max_shape)
-        profile.set_shape("segment_ids", min_shape, opt_shape, max_shape)
-        profile.set_shape("input_mask", min_shape, opt_shape, max_shape)
-        builder_config.add_optimization_profile(profile)
-
-    wbeta = trt.PluginField("bert_embeddings_layernorm_beta", weights_dict["bert_embeddings_layernorm_beta"], trt.PluginFieldType.FLOAT32)
-    wgamma = trt.PluginField("bert_embeddings_layernorm_gamma", weights_dict["bert_embeddings_layernorm_gamma"], trt.PluginFieldType.FLOAT32)
-    wwordemb = trt.PluginField("bert_embeddings_word_embeddings", weights_dict["bert_embeddings_word_embeddings"], trt.PluginFieldType.FLOAT32)
-    wtokemb = trt.PluginField("bert_embeddings_token_type_embeddings", weights_dict["bert_embeddings_token_type_embeddings"], trt.PluginFieldType.FLOAT32)
-    wposemb = trt.PluginField("bert_embeddings_position_embeddings", weights_dict["bert_embeddings_position_embeddings"], trt.PluginFieldType.FLOAT32)
-
-    output_fp16 = trt.PluginField("output_fp16", np.array([0]).astype(np.int32), trt.PluginFieldType.INT32)
-    mha_type = trt.PluginField("mha_type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32)
-
-    pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16, mha_type])
-    fn = emln_plg_creator.create_plugin("embeddings", pfc)
-
-    inputs = [input_ids, segment_ids, input_mask]
-    emb_layer = network.add_plugin_v2(inputs, fn)
-    
-    if config.use_int8:
-        set_output_range(emb_layer, weights_dict["l0_attention_self_qkv_in_amax"])
-        set_output_range(emb_layer, 1.0, 1)
-    return emb_layer
-
-def build_engine(batch_sizes, sequence_lengths, config, weights_dict):
-    explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-
-    builder = trt.Builder(TRT_LOGGER)
-    with builder.create_network(explicit_batch_flag) as network, builder.create_builder_config() as builder_config:
-        network = builder.create_network(explicit_batch_flag) 
-        builder_config = builder.create_builder_config()
-        builder_config.set_flag(trt.BuilderFlag.INT8)
-
-        # Create the network
-        emb_layer = emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes)
-        embeddings = emb_layer.get_output(0)
-        mask_idx = emb_layer.get_output(1)
-
-        residual_buffer = None
-        if config.use_int8:
-            residual_buffer = emb_layer.get_output(2)
-
-        bert_out = bert_model(config, weights_dict, network, embeddings, mask_idx, residual_buffer)
-
-        squad_logits = squad_output("cls_", config, weights_dict, network, bert_out)
-        squad_logits_out = squad_logits.get_output(0)
-
-        network.mark_output(squad_logits_out)
-
-        build_start_time = time.time()
-        engine = builder.build_engine(network, builder_config)
-        build_time_elapsed = (time.time() - build_start_time)
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "build engine in {:.3f} Sec".format(build_time_elapsed))
-        return engine
-    
-def main():
-    parser = argparse.ArgumentParser(description="TensorRT BERT Sample", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("-x", "--onnx", required=False, help="The ONNX model file path.")
-    parser.add_argument("-pt", "--pytorch", required=False, help="The PyTorch checkpoint file path.")
-    parser.add_argument("-o", "--output", required=True, default="bert_base_384.engine", help="The bert engine file, ex bert.engine")
-    parser.add_argument("-b", "--batch-size", nargs='+', help="Batch size(s) to optimize for. The engine will be usable with any batch size below this, but may not be optimal for smaller sizes. Can be specified multiple times to optimize for more than one batch size.", type=int)
-    parser.add_argument("-s", "--sequence-length", nargs='+', help="Sequence length of the BERT model", type=int)
-    parser.add_argument("-c", "--config-dir", required=True,
-                        help="The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google")
-    parser.add_argument("-f", "--fp16", action="store_true", help="Indicates that inference should be run in FP16 precision", required=False)
-    parser.add_argument("-i", "--int8", action="store_true", help="Indicates that inference should be run in INT8 precision", required=False)
-    parser.add_argument("-j", "--squad-json", default="squad/dev-v1.1.json", help="squad json dataset used for int8 calibration", required=False)
-    parser.add_argument("-v", "--vocab-file", default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", help="Path to file containing entire understandable vocab", required=False)
-    parser.add_argument("--verbose", action="store_true", help="Turn on verbose logger and set profiling verbosity to DETAILED", required=False)
-
-    args, _ = parser.parse_known_args()
-    args.batch_size = args.batch_size or [1]
-    args.sequence_length = args.sequence_length or [128]
-
-    if len(args.sequence_length) not in [1, 3]:
-        print("Error: You must provide <args.sequence_length> either one or three integers.")
-        sys.exit(1)
-
-    if len(args.batch_size) not in [1, 3]:
-        print("Error: You must provide <args.batch_size> either one or three integers.")
-        sys.exit(1)
-
-    if args.verbose:
-        TRT_LOGGER.min_severity = TRT_LOGGER.VERBOSE
-
-    bert_config_path = args.config_dir
-    TRT_LOGGER.log(TRT_LOGGER.INFO, "Using configuration file: {:}".format(bert_config_path))
-
-    config = BertConfig(bert_config_path, args.int8)
-
-    if args.onnx != None:
-        if args.int8:
-            raise RuntimeError("int8 onnx not supported now!!!")
-    elif args.pytorch != None:
-        weights_dict = load_pytorch_weights_and_quant(args.pytorch, config)
-    else:
-        raise RuntimeError("You need either specify TF checkpoint using option --ckpt or ONNX using option --onnx to build TRT BERT model.")
-
-    # engine = build_engine(args.batch_size, args.workspace_size, args.sequence_length, config, weights_dict, args.squad_json, args.vocab_file, None, args.calib_num, args.verbose)
-    with build_engine(args.batch_size, args.sequence_length, config, weights_dict) as engine:
-        TRT_LOGGER.log(TRT_LOGGER.VERBOSE, "Serializing Engine...")
-        serialized_engine = engine.serialize()
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "Saving Engine to {:}".format(args.output))
-        with open(args.output, "wb") as fout:
-            fout.write(serialized_engine)
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "Done.")
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils.py b/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils.py
deleted file mode 100644
index 76737977..00000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import onnx
-import numpy as np
-import tensorrt as trt
-import json
-import struct
-import torch
-
-TRT_LOGGER = trt.Logger(trt.Logger.INFO)
-
-"""
-Attentions Keys
-"""
-WQ = "self_query_kernel"
-BQ = "self_query_bias"
-WK = "self_key_kernel"
-BK = "self_key_bias"
-WV = "self_value_kernel"
-BV = "self_value_bias"
-WQKV = "self_qkv_kernel"
-BQKV = "self_qkv_bias"
-
-"""
-Transformer Keys
-"""
-W_AOUT = "attention_output_dense_kernel"
-B_AOUT = "attention_output_dense_bias"
-AOUT_LN_BETA = "attention_output_layernorm_beta"
-AOUT_LN_GAMMA = "attention_output_layernorm_gamma"
-W_MID = "intermediate_dense_kernel"
-B_MID = "intermediate_dense_bias"
-W_LOUT = "output_dense_kernel"
-B_LOUT = "output_dense_bias"
-LOUT_LN_BETA = "output_layernorm_beta"
-LOUT_LN_GAMMA = "output_layernorm_gamma"
-
-"""
-Squad Output Keys
-"""
-SQD_W = "squad_output_weights"
-SQD_B = "squad_output_bias"
-
-
-def get_onnx_weight_dict(tensor_dict, config):
-    N = config.num_attention_heads
-    H = config.head_size
-    hidden_size = config.hidden_size
-
-    weights_dict = dict()
-    for outname, tensor in tensor_dict.items():
-        if outname.find("_amax") != -1:
-            weights_dict[outname] = tensor.flatten()
-        elif outname.find(BQ) != -1:
-            prefix = outname[:outname.find(BQ)]
-
-            Wqkv = np.zeros((3, hidden_size, hidden_size), np.float32)
-            Bqkv = np.zeros((3, hidden_size), np.float32)
-
-            Wqkv[0,:,:] = tensor_dict[prefix + WQ]
-            Wqkv[1,:,:] = tensor_dict[prefix + WK]
-            Wqkv[2,:,:] = tensor_dict[prefix + WV]
-            Bqkv[0,:] = tensor
-            Bqkv[1,:] = tensor_dict[prefix + BK]
-            Bqkv[2,:] = tensor_dict[prefix + BV]
-
-            weights_dict[prefix + WQKV] = Wqkv.flatten()
-            weights_dict[prefix + BQKV] = Bqkv.flatten()
-            weights_dict[prefix + WQKV + "_notrans"] = np.ascontiguousarray(Wqkv.T).flatten()
-
-        elif outname.find(BK) != -1 or outname.find(BV) != -1 or outname.find(WQ) != -1 or outname.find(WK) != -1 or outname.find(WV) != -1:
-            pass
-        else:
-            flat_tensor = np.ascontiguousarray(tensor).flatten()
-            weights_dict[outname] = flat_tensor
-
-    return weights_dict
-
-def onnx_to_trt_name(onnx_name):
-    """
-    Converting variables in the onnx checkpoint to names corresponding to the naming convention used in the TF version, expected by the builder
-    """
-    qkv_strings = {'key', 'value', 'query', 'query_key_value'}
-    onnx_name = onnx_name.lower()
-    toks = [t.strip('_') for t in onnx_name.split('.')]
-    if toks[0] == 'bert': #embeddings or encoder
-        if toks[1] == 'encoder': #transformer
-            # Token conversions for sparse checkpoints
-            if toks[-2] == 'dense_act':
-                toks[-2] = 'dense'
-            elif toks[-3] == 'dense_act':
-                if toks[-2] == 'input_quantizer':
-                    toks[-2] = 'input'
-                elif toks[-2] == 'weight_quantizer':
-                    toks[-2] = 'kernel'
-                toks[-3] = 'dense'
-            elif toks[-2].startswith('matmul'):
-                toks[-2] = {
-                    'matmul_q_quantizer': 'qv_a_input_quantizer',
-                    'matmul_k_quantizer': 'qv_b_input_quantizer',
-                    'matmul_v_quantizer': 'av_b_input_quantizer',
-                    'matmul_a_quantizer': 'av_a_input_quantizer',
-                }[toks[-2].replace('input_', '')]
-
-            # Token conversions for all checkpoints
-            if toks[-2] == 'layernorm': #bias->beta, weight->gamma
-                toks[-1] = 'beta' if toks[-1] == 'bias' else 'gamma'
-            elif (toks[-2] == 'dense' or toks[-2] in qkv_strings) and toks[-1] == 'weight':
-                toks[-1] = 'kernel'
-            elif (toks[-3] == 'dense' or toks[-3] in qkv_strings) and toks[-1] == 'amax':
-                if toks[-2] == 'weight_quantizer':
-                    toks[-2] = 'kernel'
-                elif toks[-2] == 'input_quantizer':
-                    toks[-2] = 'input'
-
-            if 'final_input_quantizer' not in toks[2]:
-                ind = toks.index('layers')+1 if 'layers' in toks else 3
-                toks = toks[ind:]
-                toks[0] = 'l{}'.format(int(toks[0]))
-        else:
-            if toks[-2] == 'layernorm': #bias->beta, weight->gamma
-                toks[-1] = 'beta' if toks[-1] == 'bias' else 'gamma'
-            else: #embeddings: drop "_weight" suffix
-                if toks[-1] == 'amax':
-                    toks[-2] = 'amax'
-                toks = toks[:-1]
-    elif 'qa' in onnx_name:
-        name = 'cls_squad_output_bias' if toks[-1] == 'bias' else 'cls_squad_output_weights'
-        return name
-    else:
-        print("Encountered unknown case:", onnx_name)
-        assert(False)
-    parsed = '_'.join(toks)
-    return parsed
-
-def load_onnx_weights_and_quant(path, config):
-    """
-    Load the weights from the onnx checkpoint
-    """
-    model = onnx.load(path)
-    weights = model.graph.initializer
-    tensor_dict = dict((onnx_to_trt_name(w.name), np.frombuffer(w.raw_data, np.int8).reshape(w.dims))
-                       if w.name.split('_')[-1] == 'mask' else
-                       (onnx_to_trt_name(w.name), np.frombuffer(w.raw_data, np.float32).reshape(w.dims))
-                       for w in weights)
-    return get_onnx_weight_dict(tensor_dict, config)
-
-def load_pytorch_weights_and_quant(path, config):
-    """
-    Load the weights from the pytorch checkpoint
-    """
-    state_dict = torch.load(path, map_location='cpu')["model"]
-    tensor_dict = {onnx_to_trt_name(name):val.numpy() for name, val in state_dict.items()}
-    return get_onnx_weight_dict(tensor_dict, config)
-
-class BertConfig:
-    def __init__(self, bert_config_path, use_fp16, use_int8=False):
-        with open(bert_config_path, "r") as f:
-            data = json.load(f)
-            self.num_attention_heads = data["num_attention_heads"]
-            self.hidden_size = data["hidden_size"]
-            self.intermediate_size = data["intermediate_size"]
-            self.num_hidden_layers = data["num_hidden_layers"]
-            self.head_size = self.hidden_size // self.num_attention_heads
-            self.use_fp16 = use_fp16
-            self.use_int8 = use_int8
-
-if __name__ == '__main__':
-    bert_config_path = '../bert-large-uncased/bert_config.json'
-    onnx_model_path = '../bert-large-uncased/bert_large_v1_1_fake_quant.onnx'
-    weight_save_path = "../bert-large-uncased/bert_large_v1_1.wts"
-    config = config = BertConfig(bert_config_path, True)
-    weights_dict = load_onnx_weights_and_quant(onnx_model_path, config)
-    f = open(weight_save_path, "w")
-    num = 0
-    for key, value in weights_dict.items():
-        if key.find('_amax') == -1:
-            num += 1
-    
-    f.write('{}\n'.format(num))
-    for key, value in weights_dict.items():
-        print('key: ', key)
-        if key.find('_amax') != -1:
-            continue
-        f.write("{} {}".format(key, len(value)))
-        print(len(value))
-        for v in value:
-            f.write(" ")
-            f.write(struct.pack('>f', float(v)).hex())
-        f.write("\n")
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils_int8.py b/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils_int8.py
deleted file mode 100644
index 56ac8d18..00000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils_int8.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import numpy as np
-import tensorrt as trt
-import json
-import struct
-import torch
-
-TRT_LOGGER = trt.Logger(trt.Logger.INFO)
-
-"""
-Attentions Keys
-"""
-WQ = "self_query_kernel"
-BQ = "self_query_bias"
-WK = "self_key_kernel"
-BK = "self_key_bias"
-WV = "self_value_kernel"
-BV = "self_value_bias"
-WQKV = "self_qkv_kernel"
-BQKV = "self_qkv_bias"
-
-"""
-Transformer Keys
-"""
-W_AOUT = "attention_output_dense_kernel"
-B_AOUT = "attention_output_dense_bias"
-AOUT_LN_BETA = "attention_output_layernorm_beta"
-AOUT_LN_GAMMA = "attention_output_layernorm_gamma"
-W_MID = "intermediate_dense_kernel"
-B_MID = "intermediate_dense_bias"
-W_LOUT = "output_dense_kernel"
-B_LOUT = "output_dense_bias"
-LOUT_LN_BETA = "output_layernorm_beta"
-LOUT_LN_GAMMA = "output_layernorm_gamma"
-
-"""
-Squad Output Keys
-"""
-SQD_W = "squad_output_weights"
-SQD_B = "squad_output_bias"
-
-ixrt_name_map = {
-    "bert.embeddings.LayerNorm.bias": "bert_embeddings_layernorm_beta",
-    "bert.embeddings.LayerNorm.weight" : "bert_embeddings_layernorm_gamma",
-    "bert.embeddings.word_embeddings.weight" : "bert_embeddings_word_embeddings",
-    "bert.embeddings.token_type_embeddings.weight" : "bert_embeddings_token_type_embeddings",
-    "bert.embeddings.position_embeddings.weight" : "bert_embeddings_position_embeddings",
-    "qa_outputs.weight" : "cls_squad_output_weights",
-    "qa_outputs.bias" : "cls_squad_output_bias"
-}
-
-ixrt_atten_name_map = {
-    "bert.encoder.layer.{}.self_attn.qkv_proj.weight" : "l{}_attention_self_qkv_kernel",
-    "bert.encoder.layer.{}.self_attn.qkv_proj.bias" : "l{}_attention_self_qkv_bias",
-    "bert.encoder.layer.{}.self_attn.out_proj.bias" : "l{}_attention_output_dense_bias",
-    "bert.encoder.layer.{}.self_attn.out_proj.weight" : "l{}_attention_output_dense_kernel",
-    "bert.encoder.layer.{}.fc1.weight" : "l{}_intermediate_dense_kernel",
-    "bert.encoder.layer.{}.fc1.bias" : "l{}_intermediate_dense_bias",
-    "bert.encoder.layer.{}.fc2.weight" : "l{}_output_dense_kernel",
-    "bert.encoder.layer.{}.fc2.bias" : "l{}_output_dense_bias", 
-    "bert.encoder.layer.{}.self_attn_layer_norm.weight" : "l{}_attention_output_layernorm_gamma",
-    "bert.encoder.layer.{}.self_attn_layer_norm.bias" : "l{}_attention_output_layernorm_beta",
-    "bert.encoder.layer.{}.final_layer_norm.weight" : "l{}_output_layernorm_gamma",
-    "bert.encoder.layer.{}.final_layer_norm.bias" : "l{}_output_layernorm_beta",
-    "bert.encoder.layer.{}.self_attn.qkv_proj.weight_quant.clip.clip_value_max" : "l{}_attention_self_qkv_wei_amax",
-    "bert.encoder.layer.{}.self_attn.qkv_proj.input_quant.clip.clip_value_max" : "l{}_attention_self_qkv_in_amax",
-    "bert.encoder.layer.{}.self_attn.qkv_proj.output_quant.clip.clip_value_max" : "l{}_attention_self_qkv_out_amax",
-    "bert.encoder.layer.{}.self_attn.attention_quant.clip.clip_value_max" : "l{}_attention_arrange_qkv_amax",
-    "bert.encoder.layer.{}.self_attn.softmax_in_quant.clip.clip_value_max" : "l{}_attention_softmax_in_amax",
-    "bert.encoder.layer.{}.self_attn.atten_score_out_quant.clip.clip_value_max" : "l{}_attention_softmax_out_amax",
-    "bert.encoder.layer.{}.self_attn.out_proj.input_quant.clip.clip_value_max" : "l{}_attention_output_dense_in_amax",
-    "bert.encoder.layer.{}.self_attn.out_proj.output_quant.clip.clip_value_max" : "l{}_attention_output_dense_out_amax",
-    "bert.encoder.layer.{}.self_attn.out_proj.weight_quant.clip.clip_value_max" : "l{}_attention_output_dense_wei_amax",
-    "bert.encoder.layer.{}.fc1.input_quant.clip.clip_value_max" : "l{}_intermediate_dense_in_amax",
-    "bert.encoder.layer.{}.fc1.output_quant.clip.clip_value_max" : "l{}_intermediate_dense_out_amax",
-    "bert.encoder.layer.{}.fc1.weight_quant.clip.clip_value_max" : "l{}_intermediate_dense_wei_amax",
-    "bert.encoder.layer.{}.fc2.input_quant.clip.clip_value_max" : "l{}_output_dense_in_amax",
-    "bert.encoder.layer.{}.fc2_out_quant.clip.clip_value_max" : "l{}_output_dense_out_amax",
-    "bert.encoder.layer.{}.fc2.weight_quant.clip.clip_value_max" : "l{}_output_dense_wei_amax"
-}
-
-def get_weight_dict(tensor_dict, config):
-    N = config.num_attention_heads
-    H = config.head_size
-    hidden_size = config.hidden_size
-
-    weights_dict = dict()
-    for outname, tensor in tensor_dict.items():
-        if outname.find("_amax") != -1:
-            weights_dict[outname] = tensor.item()
-        elif outname.find(BQ) != -1:
-            prefix = outname[:outname.find(BQ)]
-
-            Wqkv = np.zeros((3, hidden_size, hidden_size), np.float32)
-            Bqkv = np.zeros((3, hidden_size), np.float32)
-
-            Wqkv[0,:,:] = tensor_dict[prefix + WQ]
-            Wqkv[1,:,:] = tensor_dict[prefix + WK]
-            Wqkv[2,:,:] = tensor_dict[prefix + WV]
-            Bqkv[0,:] = tensor
-            Bqkv[1,:] = tensor_dict[prefix + BK]
-            Bqkv[2,:] = tensor_dict[prefix + BV]
-
-            weights_dict[prefix + WQKV] = Wqkv.flatten()
-            weights_dict[prefix + BQKV] = Bqkv.flatten()
-        elif outname.find(BK) != -1 or outname.find(BV) != -1 or outname.find(WQ) != -1 or outname.find(WK) != -1 or outname.find(WV) != -1:
-            pass
-        else:
-            flat_tensor = np.ascontiguousarray(tensor).flatten()
-            weights_dict[outname] = flat_tensor
-
-    return weights_dict
-
-def pytorch_to_trt_name(state_dict, num_layer):
-    tensor_dict = {}
-    for name in ixrt_name_map.keys():
-        tensor_dict[ixrt_name_map[name]] = state_dict[name]
-
-    for name in ixrt_atten_name_map.keys():
-        for layer_id in range(num_layer):
-            key_name = name.format(layer_id)
-            value_name = ixrt_atten_name_map[name].format(layer_id)
-            tensor_dict[value_name] = state_dict[key_name]
-    return tensor_dict
-
-def load_pytorch_weights_and_quant(path, config):
-    """
-    Load the weights from the pytorch checkpoint
-    """
-    state_dict = torch.load(path, map_location='cpu')
-    tensor_dict = pytorch_to_trt_name(state_dict, config.num_hidden_layers)
-    return get_weight_dict(tensor_dict, config)
-
-class BertConfig:
-    def __init__(self, bert_config_path, use_fp16, use_int8=False, use_trt=False):
-        with open(bert_config_path, "r") as f:
-            data = json.load(f)
-            self.num_attention_heads = data["num_attention_heads"]
-            self.hidden_size = data["hidden_size"]
-            self.intermediate_size = data["intermediate_size"]
-            self.num_hidden_layers = data["num_hidden_layers"]
-            self.head_size = self.hidden_size // self.num_attention_heads
-            self.use_fp16 = use_fp16
-            self.use_int8 = use_int8
-            self.use_trt = use_trt
-
-if __name__ == '__main__':
-    bert_config_path = './data/bert-large-uncased/bert_config.json'
-    pytorch_model_path = './data/bert-large-uncased/bert_large_int8_qat.bin'
-    weight_save_path = "./data/bert-large-uncased/bert_large_v1_1_int8.wts"
-    config = BertConfig(bert_config_path, True)
-    weights_dict = load_pytorch_weights_and_quant(pytorch_model_path, config)
-    f = open(weight_save_path, "w")
-    num = 0
-    for key, value in weights_dict.items():
-        if key.find('_amax') == -1:
-            num += 1
-    
-    f.write('{}\n'.format(num))
-    for key, value in weights_dict.items():
-        if key.find('_amax') != -1:
-            continue
-        print('key: ', key)
-        f.write("{} {}".format(key, len(value)))
-        print(len(value))
-        for v in value:
-            f.write(" ")
-            f.write(struct.pack('>f', float(v)).hex())
-        f.write("\n")
-
-    f.write('{}\n'.format(len(weights_dict) - num))
-    for key, value in weights_dict.items():
-        if key.find('_amax') == -1:
-            continue
-        print('key: ', key)
-        print('value: ', value)
-        f.write('{} '.format(key))
-        f.write(struct.pack('>f', float(weights_dict[key])).hex())
-        f.write('\n')
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/evaluate-v1.1.py b/models/nlp/language_model/bert_base_squad/ixrt/python/evaluate-v1.1.py
deleted file mode 100644
index ba4ee190..00000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/evaluate-v1.1.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Obtained from https://rajpurkar.github.io/SQuAD-explorer/
-
-""" Official evaluation script for v1.1 of the SQuAD dataset. """
-from __future__ import print_function
-from collections import Counter
-import string
-import re
-import argparse
-import json
-import sys
-
-def normalize_answer(s):
-    """Lower text and remove punctuation, articles and extra whitespace."""
-    def remove_articles(text):
-        return re.sub(r'\b(a|an|the)\b', ' ', text)
-
-    def white_space_fix(text):
-        return ' '.join(text.split())
-
-    def remove_punc(text):
-        exclude = set(string.punctuation)
-        return ''.join(ch for ch in text if ch not in exclude)
-
-    def lower(text):
-        return text.lower()
-
-    return white_space_fix(remove_articles(remove_punc(lower(s))))
-
-
-def f1_score(prediction, ground_truth):
-    prediction_tokens = normalize_answer(prediction).split()
-    ground_truth_tokens = normalize_answer(ground_truth).split()
-    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
-    num_same = sum(common.values())
-    if num_same == 0:
-        return 0
-    precision = 1.0 * num_same / len(prediction_tokens)
-    recall = 1.0 * num_same / len(ground_truth_tokens)
-    f1 = (2 * precision * recall) / (precision + recall)
-    return f1
-
-
-def exact_match_score(prediction, ground_truth):
-    return (normalize_answer(prediction) == normalize_answer(ground_truth))
-
-
-def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
-    scores_for_ground_truths = []
-    for ground_truth in ground_truths:
-        score = metric_fn(prediction, ground_truth)
-        scores_for_ground_truths.append(score)
-    return max(scores_for_ground_truths)
-
-def evaluate(dataset, predictions, f1_acc):
-    f1 = exact_match = total = 0
-    for article in dataset:
-        for paragraph in article['paragraphs']:
-            for qa in paragraph['qas']:
-                total += 1
-                if qa['id'] not in predictions:
-                    message = 'Unanswered question ' + qa['id'] + \
-                              ' will receive score 0.'
-                    print(message, file=sys.stderr)
-                    continue
-                ground_truths = list(map(lambda x: x['text'], qa['answers']))
-                prediction = predictions[qa['id']]
-                exact_match += metric_max_over_ground_truths(
-                    exact_match_score, prediction, ground_truths)
-                f1 += metric_max_over_ground_truths(
-                    f1_score, prediction, ground_truths)
-
-    exact_match = 100.0 * exact_match / total
-    f1 = 100.0 * f1 / total
-    if (f1 < f1_acc - 0.5):
-        print("&&&& FAILED TensorRT BERT Squad Accuracy matches reference.")
-    else:
-        print("&&&& PASSED TensorRT BERT Squad Accuracy matches reference.")
-    return {'exact_match': exact_match, 'f1': f1}
-
-if __name__ == '__main__':
-    expected_version = '1.1'
-    parser = argparse.ArgumentParser(
-        description='Evaluation for SQuAD ' + expected_version)
-    parser.add_argument('dataset_file', help='Dataset file')
-    parser.add_argument('prediction_file', help='Prediction File')
-    parser.add_argument('f1_acc', help='Reference Accuracy')
-    args = parser.parse_args()
-    with open(args.dataset_file) as dataset_file:
-        dataset_json = json.load(dataset_file)
-        if (dataset_json['version'] != expected_version):
-            print('Evaluation expects v-' + expected_version +
-                  ', but got dataset with v-' + dataset_json['version'],
-                  file=sys.stderr)
-        dataset = dataset_json['data']
-    with open(args.prediction_file) as prediction_file:
-        predictions = json.load(prediction_file)
-        f1_acc = float(args.f1_acc)
-    print(json.dumps(evaluate(dataset, predictions, f1_acc)))
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py b/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py
deleted file mode 100644
index 25a40278..00000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py
+++ /dev/null
@@ -1,417 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import sys
-import time
-import json
-import ctypes
-import argparse
-import collections
-import numpy as np
-import tensorrt as trt
-import pycuda.driver as cuda
-import pycuda.autoinit
-
-import helpers.tokenization as tokenization
-import helpers.data_processing as dp
-from tqdm import tqdm
-import math
-
-from load_ixrt_plugin import load_ixrt_plugin
-TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
-
-def parse_args():
-    """
-    Parse command line arguments
-    """
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument('-e', '--engine',
-            help='Path to BERT TensorRT engine')
-    parser.add_argument("-b", "--batch-size", default=1, help="Batch size for inference.", type=int)
-    parser.add_argument('-p', '--passage', nargs='*',
-            help='Text for paragraph/passage for BERT QA',
-            default='')
-    parser.add_argument('-pf', '--passage-file',
-            help='File containing input passage',
-            default='')
-    parser.add_argument('-q', '--question', nargs='*',
-            help='Text for query/question for BERT QA',
-            default='')
-    parser.add_argument('-qf', '--question-file',
-            help='File containing input question',
-            default='')
-    parser.add_argument('-sq', '--squad-json',
-            help='SQuAD json file',
-            default='')
-    parser.add_argument('-o', '--output-prediction-file',
-            help='Output prediction file for SQuAD evaluation',
-            default='./predictions.json')
-    parser.add_argument('-v', '--vocab-file',
-            help='Path to file containing entire understandable vocab')
-    parser.add_argument('-s', '--sequence-length',
-            help='The sequence length to use. Defaults to 128',
-            default=128, type=int)
-    parser.add_argument('--max-query-length',
-            help='The maximum length of a query in number of tokens. Queries longer than this will be truncated',
-            default=64, type=int)
-    parser.add_argument('--max-answer-length',
-            help='The maximum length of an answer that can be generated',
-            default=30, type=int)
-    parser.add_argument('--n-best-size',
-            help='Total number of n-best predictions to generate in the nbest_predictions.json output file',
-            default=20, type=int)
-    parser.add_argument('--doc-stride',
-            help='When splitting up a long document into chunks, what stride to take between chunks',
-            default=128, type=int)
-    parser.add_argument('--target_qps',
-            help="target qps metric", required=False, type=int)
-    parser.add_argument("-i", "--int8", action="store_true", help="Indicates that inference should be run in INT8 precision", required=False)
-    args, _ = parser.parse_known_args()
-    return args
-
-if __name__ == '__main__':
-    args = parse_args()
-
-    paragraph_text = None
-    squad_examples = None
-    output_prediction_file = None
-
-    if not args.passage == '':
-        paragraph_text = ' '.join(args.passage)
-    elif not args.passage_file == '':
-        f = open(args.passage_file, 'r')
-        paragraph_text = f.read()
-    elif not args.squad_json == '':
-        squad_examples = dp.read_squad_json(args.squad_json)
-        output_prediction_file = args.output_prediction_file
-    else:
-        paragraph_text = input("Paragraph: ")
-
-    question_text = None
-    if not args.question == '':
-        question_text = ' '.join(args.question)
-    elif not args.question_file == '':
-        f = open(args.question_file, 'r')
-        question_text = f.read()
-
-    tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=True)
-    # When splitting up a long document into chunks, how much stride to take between chunks.
-    doc_stride = args.doc_stride
-    # The maximum total input sequence length after WordPiece tokenization.
-    # Sequences longer than this will be truncated, and sequences shorter
-    max_seq_length = args.sequence_length
-
-    def question_features(tokens, question):
-        # Extract features from the paragraph and question
-        return dp.convert_example_to_features(tokens, question, tokenizer, max_seq_length, doc_stride, args.max_query_length)
-
-    load_ixrt_plugin(TRT_LOGGER)
-
-    # The first context created will use the 0th profile. A new context must be created
-    # for each additional profile needed. Here, we only use batch size 1, thus we only need the first profile.
-    with open(args.engine, 'rb') as f:
-        runtime = trt.Runtime(TRT_LOGGER)
-        engine = runtime.deserialize_cuda_engine(f.read())
-        context = engine.create_execution_context()
-
-        # select engine profile
-        selected_profile = -1
-        num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles
-        for idx in range(engine.num_optimization_profiles):
-            profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)
-            if profile_shape[0][0] <= args.batch_size and profile_shape[2][0] >= args.batch_size and profile_shape[0][1] <= max_seq_length and profile_shape[2][1] >= max_seq_length:
-                selected_profile = idx
-                break
-        if selected_profile == -1:
-            raise RuntimeError("Could not find any profile that can run batch size {}.".format(args.batch_size))
-
-        # Create a stream in which to copy inputs/outputs and run inference.
-        stream = cuda.Stream()
-
-        # if args.use_trt:
-        #     context.active_optimization_profile = selected_profile
-        # else:
-        context.set_optimization_profile_async(selected_profile, stream.handle)
-        binding_idx_offset = selected_profile * num_binding_per_profile
-
-        input_shape = (args.batch_size, max_seq_length)
-        input_nbytes = trt.volume(input_shape) * 4
-        for binding in range(3):
-            context.set_binding_shape(binding, input_shape)
-        assert context.all_binding_shapes_specified
-
-        # Allocate device memory for inputs.
-        d_inputs = [cuda.mem_alloc(input_nbytes) for binding in range(3)]
-
-        # Allocate output buffer by querying the size from the context. This may be different for different input shapes.
-        h_output = cuda.pagelocked_empty(tuple(context.get_binding_shape(binding_idx_offset + 3)), dtype=np.float32)
-        d_output = cuda.mem_alloc(h_output.nbytes)
-
-        def inference(features, tokens):
-            global h_output
-
-            _NetworkOutput = collections.namedtuple(  # pylint: disable=invalid-name
-                    "NetworkOutput",
-                    ["start_logits", "end_logits", "feature_index"])
-            networkOutputs = []
-
-            eval_time_elapsed = 0
-            for feature_index, feature in enumerate(features):
-                # Copy inputs
-                input_ids_batch = np.repeat(np.expand_dims(feature.input_ids, 0), args.batch_size, axis=0)
-                segment_ids_batch = np.repeat(np.expand_dims(feature.segment_ids, 0), args.batch_size, axis=0)
-                input_mask_batch = np.repeat(np.expand_dims(feature.input_mask, 0), args.batch_size, axis=0)
-
-                input_ids = cuda.register_host_memory(np.ascontiguousarray(input_ids_batch.ravel()))
-                segment_ids = cuda.register_host_memory(np.ascontiguousarray(segment_ids_batch.ravel()))
-                input_mask = cuda.register_host_memory(np.ascontiguousarray(input_mask_batch.ravel()))
-
-                eval_start_time = time.time()
-                cuda.memcpy_htod_async(d_inputs[0], input_ids, stream)
-                cuda.memcpy_htod_async(d_inputs[1], segment_ids, stream)
-                cuda.memcpy_htod_async(d_inputs[2], input_mask, stream)
-
-                # Run inference
-                context.execute_async_v2(bindings=[0 for i in range(binding_idx_offset)] +[int(d_inp) for d_inp in d_inputs] + [int(d_output)], stream_handle=stream.handle)
-                # Synchronize the stream
-                stream.synchronize()
-                eval_time_elapsed += (time.time() - eval_start_time)
-
-                # Transfer predictions back from GPU
-                cuda.memcpy_dtoh_async(h_output, d_output, stream)
-                stream.synchronize()
-
-                # Only retrieve and post-process the first batch
-                batch = h_output[0]
-
-                networkOutputs.append(_NetworkOutput(
-                    start_logits = np.array(batch.squeeze()[:, 0]),
-                    end_logits = np.array(batch.squeeze()[:, 1]),
-                    feature_index = feature_index
-                    ))
-
-            eval_time_elapsed /= len(features)
-
-            # Total number of n-best predictions to generate in the nbest_predictions.json output file
-            n_best_size = 20
-
-            # The maximum length of an answer that can be generated. This is needed
-            # because the start and end predictions are not conditioned on one another
-            max_answer_length = 30
-
-            prediction, nbest_json, scores_diff_json = dp.get_predictions(tokens, features,
-                    networkOutputs, args.n_best_size, args.max_answer_length)
-
-            return eval_time_elapsed, prediction, nbest_json
-
-        def print_single_query(eval_time_elapsed, prediction, nbest_json):
-            print("------------------------")
-            print("Running inference in {:.3f} Sentences/Sec".format(args.batch_size/eval_time_elapsed))
-            print("------------------------")
-
-            print("Answer: '{}'".format(prediction))
-            print("With probability: {:.3f}".format(nbest_json[0]['probability'] * 100.0))
-
-        def inference_all_dynamic(features_list, squad_examples, sort_index, all_precision):
-            # h_output = torch.tensor((args.batch_size, max_seq_length, 2))
-            global h_output
-            _NetworkOutput = collections.namedtuple(  # pylint: disable=invalid-name
-                    "NetworkOutput",
-                    ["start_logits", "end_logits", "feature_index"])
-            networkOutputs = []
-
-            batch_input_ids = []
-            batch_segment_ids = []
-            all_token_ids = []
-            batch_example_list = []
-            batch_feature_list = []
-            batch_feature = []
-            batch_example = []
-            max_batch_length = 0
-            seq_length_list = []
-            for index in tqdm(sort_index):
-                batch_feature.append(features_list[index])
-                batch_example.append(squad_examples[index])
-                max_batch_length = max(max_batch_length, len(features_list[index].input_ids))
-                if args.int8:
-                    max_batch_length = max_seq_length
-                else:
-                    max_batch_length = math.ceil(max_batch_length / 2) * 2
-                seq_length_list.append(len(features_list[index].input_ids))
-                if len(batch_feature) == args.batch_size:
-                    batch_input_ids = [
-                        np.pad(bf.input_ids, (0, max_batch_length - bf.input_ids.shape[0]), 'constant',constant_values = (0)).reshape(1, -1)
-                            for bf in batch_feature
-                    ]
-                    batch_input_ids = np.concatenate(batch_input_ids, axis=0)
-                    batch_segment_ids = [
-                        np.pad(bf.segment_ids, (0, max_batch_length - bf.segment_ids.shape[0]), 'constant',constant_values = (0)).reshape(1, -1)
-                            for bf in batch_feature
-                    ]
-                    batch_segment_ids = np.concatenate(batch_segment_ids, axis=0)
-                    all_token_ids.append(
-                        [
-                            batch_input_ids.astype(np.int32),
-                            batch_segment_ids.astype(np.int32)
-                        ]
-                    )
-                    batch_example_list.append(batch_example)
-                    batch_feature_list.append(batch_feature)
-                    batch_input_ids = []
-                    batch_segment_ids = []
-                    batch_feature = []
-                    batch_example = []
-                    max_batch_length = 0
-
-            if len(batch_feature):
-                batch_input_ids = [
-                    np.pad(bf.input_ids, (0, max_batch_length - bf.input_ids.shape[0]), 'constant',constant_values = (0)).reshape(1, -1)
-                        for bf in batch_feature
-                ]
-                batch_input_ids = np.concatenate(batch_input_ids, axis=0)
-                batch_segment_ids = [
-                    np.pad(bf.segment_ids, (0, max_batch_length - bf.segment_ids.shape[0]), 'constant',constant_values = (0)).reshape(1, -1)
-                        for bf in batch_feature
-                ]
-                batch_segment_ids = np.concatenate(batch_segment_ids, axis=0)
-                all_token_ids.append(
-                    [
-                        batch_input_ids.astype(np.int32),
-                        batch_segment_ids.astype(np.int32)
-                    ]
-                )
-                batch_input_ids = []
-                batch_segment_ids = []
-                batch_example_list.append(batch_example)
-                batch_feature_list.append(batch_feature)
-
-            # warm up
-            for i in range(20):
-                for binding in range(3):
-                    context.set_binding_shape(binding, (args.batch_size, max_seq_length))
-                assert context.all_binding_shapes_specified
-                cuda.memcpy_htod_async(d_inputs[0], np.zeros((args.batch_size, max_seq_length), dtype=np.int32).ravel(), stream)
-                cuda.memcpy_htod_async(d_inputs[1], np.zeros((args.batch_size, max_seq_length), dtype=np.int32).ravel(), stream)
-                context.execute_async_v2(bindings=[0 for i in range(binding_idx_offset)] +[int(d_inp) for d_inp in d_inputs] + [int(d_output)], stream_handle=stream.handle)
-            stream.synchronize()
-
-            start_time = time.time()
-            output_index = 0
-            for input_ids, segment_ids in tqdm(all_token_ids):
-                for binding in range(3):
-                    context.set_binding_shape(binding, input_ids.shape)
-                assert context.all_binding_shapes_specified
-
-                cuda.memcpy_htod_async(d_inputs[0], input_ids.ravel(), stream)
-                cuda.memcpy_htod_async(d_inputs[1], segment_ids.ravel(), stream)
-                stream.synchronize()
-
-                context.execute_async_v2(bindings=[0 for i in range(binding_idx_offset)] +[int(d_inp) for d_inp in d_inputs] + [int(d_output)], stream_handle=stream.handle)
-                stream.synchronize()
-
-                cuda.memcpy_dtoh_async(h_output, d_output, stream)
-                stream.synchronize()
-
-                new_h_output = np.array(h_output.reshape(-1)[:input_ids.shape[0]*input_ids.shape[1]*2]).reshape(input_ids.shape[0], input_ids.shape[1], 2)
-                for index in range(input_ids.shape[0]):
-                    networkOutputs.append(_NetworkOutput(
-                        start_logits = new_h_output[index, :seq_length_list[output_index], 0],
-                        end_logits = new_h_output[index, :seq_length_list[output_index], 1],
-                        feature_index = index
-                    ))
-                    output_index += 1
-            infer_time = time.time() - start_time
-            output_index = 0
-            for (be, bf) in zip(batch_example_list, batch_feature_list):
-                for index in range(len(bf)):
-                    prediction, nbest_json, scores_diff_json = dp.get_predictions(be[index].doc_tokens, bf,
-                        [networkOutputs[output_index]], args.n_best_size, args.max_answer_length)
-                    output_index += 1
-                    all_precision[be[index].id] = prediction
-            return infer_time, all_precision
-
-        status = 0
-        if squad_examples:
-            all_predictions = collections.OrderedDict()
-
-            features_list = []
-            lengths = []
-
-            for example_index, example in tqdm(enumerate(squad_examples)):
-                features = question_features(example.doc_tokens, example.question_text)
-                features_list.append(features[0])
-                lengths.append(len(features[0].input_ids))
-
-            sort_index = np.argsort(lengths)
-            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)
-            print(F"E2E time : {infer_time:.3f} seconds")
-            qps = len(squad_examples)/infer_time
-            print(f"Latency QPS: {qps} sentences/s")
-            metricResult = {"metricResult": {}}
-            metricResult["metricResult"]["E2E time"] = round(infer_time, 3)
-            metricResult["metricResult"]["Latency QPS"] = round(qps, 3)
-            print(metricResult)
-
-            with open(output_prediction_file, "w") as f:
-                f.write(json.dumps(all_predictions, indent=4))
-                print("\nOutput dump to {}".format(output_prediction_file))
-
-            if args.target_qps:
-                if qps >= args.target_qps:
-                    print(f"target qps: {args.target_qps}, qps: {qps}, pass.")
-                else:
-                    print(f"target qps: {args.target_qps}, qps: {qps}, failed.")
-                    status = 1
-        else:
-            # Extract tokecs from the paragraph
-            doc_tokens = dp.convert_doc_tokens(paragraph_text)
-
-            if question_text:
-                print("\nPassage: {}".format(paragraph_text))
-                print("\nQuestion: {}".format(question_text))
-
-                features = question_features(doc_tokens, question_text)
-                eval_time_elapsed, prediction, nbest_json = inference(features, doc_tokens)
-                print_single_query(eval_time_elapsed, prediction, nbest_json)
-            else:
-                # If no question text is provided, loop until the question is 'exit'
-                EXIT_CMDS = ["exit", "quit"]
-                question_text = input("Question (to exit, type one of {:}): ".format(EXIT_CMDS))
-
-                while question_text.strip() not in EXIT_CMDS:
-                    features = question_features(doc_tokens, question_text)
-                    eval_time_elapsed, prediction, nbest_json = inference(features, doc_tokens)
-                    # print_single_query(eval_time_elapsed, prediction, nbest_json)
-                    # question_text = input("Question (to exit, type one of {:}): ".format(EXIT_CMDS))
-        del context
-        del engine
-        sys.exit(status)
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
index 67d6c182..05b91c9b 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
@@ -96,7 +96,10 @@ def evaluate(dataset, predictions, f1_acc):
         status = 0
     else:
         print("&&&& PASSED TensorRT BERT Squad Accuracy matches reference.")
-        
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["exact_match"] = round(exact_match, 3)
+    metricResult["metricResult"]["f1"] = round(f1, 3)
+    print(metricResult)
     return {'exact_match': exact_match, 'f1': f1, "status": status}
 
 if __name__ == '__main__':
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
index b6af06dc..920d5b80 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
@@ -385,6 +385,11 @@ if __name__ == '__main__':
             qps = math.ceil(len(squad_examples)/args.batch_size)*args.batch_size/infer_time
             print(f"Latency QPS: {qps} sentences/s")
 
+            metricResult = {"metricResult": {}}
+            metricResult["metricResult"]["E2E time"] = round(infer_time, 3)
+            metricResult["metricResult"]["Latency QPS"] = round(qps, 3)
+            print(metricResult)
+
             with open(output_prediction_file, "w") as f:
                 f.write(json.dumps(all_predictions, indent=4))
                 print("\nOutput dump to {}".format(output_prediction_file))
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/perf.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/perf.py
similarity index 100%
rename from models/nlp/language_model/bert_base_squad/ixrt/python/perf.py
rename to models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/perf.py
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/load_ixrt_plugin.py b/models/nlp/language_model/bert_base_squad/ixrt/python/load_ixrt_plugin.py
deleted file mode 100644
index 8e04b807..00000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/load_ixrt_plugin.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-from os.path import join, dirname, exists, abspath
-import tensorrt as trt
-import ctypes
-import os
-import subprocess
-
-def is_nvidia_platform():
-    try:
-        # 尝试运行 nvidia-smi
-        subprocess.check_output(['nvidia-smi'])
-        return True
-    except (subprocess.CalledProcessError, FileNotFoundError):
-        return False
-
-def load_ixrt_plugin(logger=trt.Logger(trt.Logger.WARNING), namespace="", dynamic_path=""):
-    if not dynamic_path:
-        if is_nvidia_platform():
-            dynamic_path = join(dirname(abspath(__file__)), "..", "build", "libixrt_plugin.so") 
-        else:
-            dynamic_path = join(dirname(trt.__file__), "lib", "libixrt_plugin.so")
-        
-    if not exists(dynamic_path):
-        raise FileNotFoundError(
-            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
-    handle = ctypes.CDLL(dynamic_path, mode=ctypes.RTLD_GLOBAL)
-    handle.initLibNvInferPlugins.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
-    handle.initLibNvInferPlugins.restype = ctypes.c_bool
-    handle.initLibNvInferPlugins(None, namespace.encode('utf-8'))
-    print(f"Loaded plugin from {dynamic_path}")
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/python/evaluate-v1.1.py b/models/nlp/language_model/bert_large_squad/ixrt/python/evaluate-v1.1.py
index ba4ee190..ce5bb98d 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/python/evaluate-v1.1.py
+++ b/models/nlp/language_model/bert_large_squad/ixrt/python/evaluate-v1.1.py
@@ -107,6 +107,10 @@ def evaluate(dataset, predictions, f1_acc):
         print("&&&& FAILED TensorRT BERT Squad Accuracy matches reference.")
     else:
         print("&&&& PASSED TensorRT BERT Squad Accuracy matches reference.")
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["exact_match"] = round(exact_match, 3)
+    metricResult["metricResult"]["f1"] = round(f1, 3)
+    print(metricResult)
     return {'exact_match': exact_match, 'f1': f1}
 
 if __name__ == '__main__':
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu b/models/nlp/language_model/bert_large_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
index 5c4d5c53..2330debf 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
+++ b/models/nlp/language_model/bert_large_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
@@ -284,7 +284,7 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
         case 64:
         case 128:
         case 192:
-        case 256:
+        case 256: {
             cuinferFlashAttnConfigInfo flashAttnInfo;
             flashAttnInfo.scaling = sqrt(1.f / (head_dim * 1.0));
             flashAttnInfo.quantParam.q_amax = arrange_qkv_amax;
@@ -318,7 +318,8 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
             CUINFER_CHECK(cuinferFMHAForwardEx(cuinfer_handle, flashAttnInfo, qDesc, q_buffer, kDesc, k_buffer, vDesc,
                                                v_buffer, maskDesc, mask, oDesc, qk_buffer));
             break;
-        default:
+        }
+        default: {
             cuinfer_i8_gemm(k_buffer, q_buffer, nullptr, qkv_buffer, batch_size * head_num, batch_seq_len,
                             batch_seq_len, head_dim, batch_seq_len * head_dim, batch_seq_len * head_dim,
                             batch_seq_len * batch_seq_len, scaleBmm1, 0.0, 0, cuinfer_handle, stream);
@@ -330,6 +331,7 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
                                batch_seq_len, batch_seq_len * head_dim, batch_seq_len * batch_seq_len,
                                batch_seq_len * head_dim, scaleBmm2, cuinfer_handle, stream);
             break;
+        }
     }
 
     IxinferArrangeAttenOutputI8II8O(batch_token_num, hidden_size, stream, qk_buffer, qkv_out, batch_seq_len, head_dim,
diff --git a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
index a02a84bb..bbe3af2b 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
+++ b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
@@ -25,6 +25,8 @@ else
     echo "Not Support Os"
 fi
 
+pip3 install -r requirements.txt
+
 ln -s /root/data/checkpoints/20210601_u2++_conformer_exp_aishell ./conformer_checkpoints
 
 cp -r /root/data/datasets/AISHELL/data_aishell ./aishell_test_data
diff --git a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
index 45a5a1ab..70222190 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
+++ b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
@@ -274,6 +274,7 @@ def main():
     target_cer = float(os.environ["Accuracy"])
     metricResult = {"metricResult": {}}
     metricResult["metricResult"]["CER"] = round(cer, 3)
+    metricResult["metricResult"]["target CER"] = round(target_cer, 3)
     print(metricResult)
     print("CER: ", cer, "target CER: ", target_cer)
     if cer <= target_cer:
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index c1324d15..e5e9ac58 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -319,6 +319,12 @@ def run_segmentation_and_face_testcase(model):
         bash scripts/infer_{model_name}_{prec}_performance.sh
         """
 
+        if model_name == "clip":
+            script = f"""
+            cd ../{model['relative_path']}
+            python3 inference.py
+            """
+
         r, t = run_script(script)
         sout = r.stdout
         
@@ -401,11 +407,6 @@ def run_nlp_testcase(model):
                 bash script/build_engine.sh --bs 32 --int8
                 bash script/inference_squad.sh --bs 32 --int8
                 """
-        elif model_name == "clip":
-            script = f"""
-            cd ../{model['relative_path']}
-            python3 inference.py
-            """
 
         r, t = run_script(script)
         sout = r.stdout
-- 
Gitee


From cbbc6bd24d5c966e3384337dc667e02cc1a16707 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Fri, 17 Jan 2025 10:48:17 +0800
Subject: [PATCH 30/35] update bert

---
 tests/run_ixrt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index e5e9ac58..63573efa 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -391,7 +391,7 @@ def run_nlp_testcase(model):
             script = f"""
             set -x
             cd ../{model['relative_path']}/python
-            bash scripts/infer_{model_name}_{prec}_ixrt.sh
+            bash script/infer_{model_name}_{prec}_ixrt.sh
             """
         elif model_name == "bert_large_squad":
             script = f"""
-- 
Gitee


From f5375839bc709231f08d1f3e8cabaee73d836316 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Fri, 17 Jan 2025 11:27:38 +0800
Subject: [PATCH 31/35] fix yolov7

---
 models/cv/detection/yolov7/ixrt/README.md     | 2 +-
 models/cv/detection/yolov7/ixrt/ci/prepare.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/cv/detection/yolov7/ixrt/README.md b/models/cv/detection/yolov7/ixrt/README.md
index a2716c7a..7b6e91c3 100644
--- a/models/cv/detection/yolov7/ixrt/README.md
+++ b/models/cv/detection/yolov7/ixrt/README.md
@@ -33,7 +33,7 @@ Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the valida
 
 git clone https://github.com/WongKinYiu/yolov7.git
 cd yolov7
-python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 32
+python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 16
 mkdir /Your_Projects/To/checkpoints
 mv yolov7.onnx /Path/to/checkpoints/yolov7m.onnx
 ```
diff --git a/models/cv/detection/yolov7/ixrt/ci/prepare.sh b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
index fca64779..310566fb 100644
--- a/models/cv/detection/yolov7/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
@@ -30,6 +30,6 @@ mkdir -p checkpoints
 cp -r /root/data/3rd_party/yolov7 ./
 cd yolov7
 ln -s /root/data/checkpoints/yolov7.pt ./
-python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 32
+python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 16
 mv yolov7.onnx ../checkpoints/yolov7m.onnx
 cd ..
-- 
Gitee


From e7e2cae2012c7fccf0af488de0af583556157991 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Mon, 20 Jan 2025 02:40:22 +0000
Subject: [PATCH 32/35] fix byte infer

---
 .../general_perf/backends/ILUVATAR/common.py  |  26 +-
 .../backends/ILUVATAR/optimizer/README.md     | 114 ++++
 .../backends/ILUVATAR/optimizer/__init__.py   |  16 +
 .../ILUVATAR/optimizer/onnx_model_PVT.py      | 593 ++++++++++++++++
 .../ILUVATAR/optimizer/onnx_model_bert.py     |  47 +-
 .../optimizer/onnx_model_conformer.py         |  17 +-
 .../optimizer/onnx_model_cosyvoice.py         | 640 ++++++++++++++++++
 .../ILUVATAR/optimizer/onnx_model_roformer.py |  17 +-
 .../ILUVATAR/optimizer/onnx_model_t5.py       |  45 +-
 .../ILUVATAR/optimizer/onnx_model_yolo.py     |  16 +
 .../backends/ILUVATAR/optimizer/optimizer.py  | 115 ++--
 .../ILUVATAR/optimizer/passes/__init__.py     |  16 +
 .../ILUVATAR/optimizer/passes/float16.py      | 139 +++-
 .../optimizer/passes/fuse_inverse_sigmoid.py  |  85 +++
 .../optimizer/passes/fuse_l2_normalization.py |  69 ++
 .../optimizer/passes/fuse_omdet_attention.py  | 149 ++++
 .../optimizer/passes/fuse_series_bias_add.py  |  25 +-
 .../optimizer/passes/fusion_PVT_attention.py  | 130 ++++
 .../passes/fusion_albert_attention.py         |  49 +-
 .../optimizer/passes/fusion_attention.py      | 103 ++-
 .../ILUVATAR/optimizer/passes/fusion_base.py  |  16 +
 .../optimizer/passes/fusion_biasgelu.py       |  19 +-
 .../passes/fusion_conformer_attention.py      |  16 +
 .../passes/fusion_conformer_xsoftmax.py       |  16 +
 .../optimizer/passes/fusion_conv_reformat.py  | 128 ++++
 .../passes/fusion_cosyvoice_attention.py      | 210 ++++++
 .../passes/fusion_cosyvoice_splitQKV.py       | 197 ++++++
 ...usion_cosyvoice_splitQKV_update_KVcache.py | 188 +++++
 .../optimizer/passes/fusion_customfc.py       |  47 +-
 .../passes/fusion_disentangled_attention.py   |  16 +
 .../optimizer/passes/fusion_embedlayer.py     | 465 +++++++++++--
 .../optimizer/passes/fusion_fastgelu.py       |  16 +
 .../passes/fusion_format_roformer.py          |  16 +
 .../ILUVATAR/optimizer/passes/fusion_gelu.py  |  82 ++-
 .../passes/fusion_gelu_approximation.py       |  23 +-
 .../optimizer/passes/fusion_gpt_attention.py  |  99 ++-
 .../passes/fusion_gpt_attention_megatron.py   |  84 ++-
 .../passes/fusion_gpt_attention_no_past.py    |  37 +-
 .../optimizer/passes/fusion_layernorm.py      |  16 +
 .../optimizer/passes/fusion_options.py        |  19 +
 .../passes/fusion_qordered_attention.py       | 162 ++++-
 .../optimizer/passes/fusion_qordered_gelu.py  |  39 +-
 .../passes/fusion_qordered_layernorm.py       |  57 +-
 .../passes/fusion_qordered_matmul.py          |  85 ++-
 .../optimizer/passes/fusion_reshape.py        |  37 +-
 .../optimizer/passes/fusion_rms_norm.py       |  16 +
 .../passes/fusion_roformer_attention.py       |   5 +-
 .../ILUVATAR/optimizer/passes/fusion_rope.py  |  16 +
 .../ILUVATAR/optimizer/passes/fusion_shape.py |  37 +-
 .../optimizer/passes/fusion_skiplayernorm.py  |  16 +
 .../optimizer/passes/fusion_splitQKV.py       |  16 +
 .../passes/fusion_splitQKV_update_KVcache.py  | 128 ++++
 .../passes/fusion_swinl_attention.py          | 186 +++--
 .../optimizer/passes/fusion_t5_attention.py   | 185 ++++-
 .../ILUVATAR/optimizer/passes/fusion_utils.py |  66 +-
 .../passes/fusion_videobert_attention.py      | 156 +++--
 .../optimizer/passes/fusion_vit_attention.py  | 115 ++++
 .../optimizer/passes/fusion_xsoftmax.py       |  16 +
 .../optimizer/passes/fusion_yolov5_decoder.py |  16 +
 .../ILUVATAR/optimizer/passes/onnx_model.py   |  16 +
 .../optimizer/passes/shape_infer_helper.py    |  31 +-
 .../optimizer/passes/symbolic_shape_infer.py  | 628 +++++++++++++----
 .../ILUVATAR/optimizer/requirements.txt       |  20 +-
 .../ILUVATAR/runtime_backend_iluvatar.py      |   2 +-
 .../model_zoo/roformer-tf-fp32.json           |   8 +-
 .../general_perf/requirements.txt             |   2 +-
 66 files changed, 5619 insertions(+), 563 deletions(-)
 create mode 100755 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/README.md
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_PVT.py
 create mode 100755 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_cosyvoice.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_inverse_sigmoid.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_l2_normalization.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_omdet_attention.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_PVT_attention.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conv_reformat.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_attention.py
 create mode 100755 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV_update_KVcache.py
 create mode 100644 toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV_update_KVcache.py

diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/common.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/common.py
index c4576743..4062220c 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/common.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/common.py
@@ -26,8 +26,8 @@ import threading
 
 import importlib
 
-tensorrt = None      
-Dims = None                                                                           
+import tensorrt
+from tensorrt import Dims                                                                           
                           
 tvm = None  
 
@@ -39,25 +39,7 @@ def setup_seed(seed):
      torch.backends.cudnn.deterministic = True
 
 
-def load_ixrt_plugin(logger=None, namespace="", dynamic_path="", model="", precision=""):
-    global tensorrt
-    global Dims
-
-    if tensorrt is not None:
-        return
-    
-    if precision == 'FP16':
-        if model == 'resnet50' or model == 'bert' or model == 'albert' or model == 'deberta' or model == 'yolov5':
-            tensorrt = importlib.import_module("tensorrt_legacy")
-            Dims = getattr(tensorrt, "Dims")
-        else:
-            tensorrt = importlib.import_module("tensorrt")
-            Dims = getattr(tensorrt, "Dims")
-    
-    if precision == 'INT8':
-        tensorrt = importlib.import_module("tensorrt")
-        Dims = getattr(tensorrt, "Dims")
-    
+def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.WARNING), namespace="", dynamic_path="", model="", precision=""):
     if not dynamic_path:
         dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so")
 
@@ -66,7 +48,7 @@ def load_ixrt_plugin(logger=None, namespace="", dynamic_path="", model="", preci
             f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
     
     ctypes.CDLL(dynamic_path, mode=ctypes.RTLD_GLOBAL)
-    tensorrt.init_libnvinfer_plugins(tensorrt.Logger(tensorrt.Logger.INFO), namespace)
+    tensorrt.init_libnvinfer_plugins(logger, namespace)
     print(f"Loaded plugin from {dynamic_path}")
 
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/README.md b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/README.md
new file mode 100755
index 00000000..3d131803
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/README.md
@@ -0,0 +1,114 @@
+# IxRT optimizer
+
+## 1. optimizer 简介
+
+`optimizer` 是一个 ixrt 中集成的图融合工具，用于将onnx图中的op融合成对应的IxRT plugin，一般与 IxRT 配合进行使用；
+
+## 2. optimizer 功能说明
+
+| 功能       | 说明                                                         |
+| ---------- | ------------------------------------------------------------ |
+| 动态图支持 | 支持融合动态图和静态图                                       |
+| 模型支持   | 目前测试通过videobert, roberta, deberta, swinL, roformer, albert, yolov5s, visionTransformer, gpt2模型，其他模型暂不推荐使用该工具 |
+
+## 3. optimizer 运行参数
+
+| 参数             | 说明                                                         |
+| ---------------- | ------------------------------------------------------------ |
+| `--onnx`         | 必选 ，指定要运行的 onnx 模型路径                            |
+| `--num_heads`    | 可选 ，指定模型对应Attention模块注意力头的个数               |
+| `--hidden_size`  | 可选， 模型模型隐藏层的大小                                  |
+| `--input_shapes` | 可选 ，固定动态模型的输入形状，以从静态形状推理，示例 --input_shapes "input_name1:3x224x224, input_name2:3x224x224"类型 |
+| `--dump_onnx`    | 可选 ，用于图融合过程中dump出中间的onnx图，生成 _sim 结尾的 onnx 模型 |
+| `--model_type`   | 可选 ，可以指定要融合的模型类型，默认是"bert", 可选["bert", "swint", "roformer", "yolo", "gpt2", "vit"] |
+| `--log_level`    | 可选 ，指定IxRT运行时显示日志的等级， 可指定为debug、info、error，默认为 info |
+
+
+## 4. 运行示例
+
+###  4.1 示例1：融合albert|videobert|roberta|deberta
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH}
+```
+
+###  4.2 示例2：融合swinL
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH} --input_shapes pixel_values.1:${BS}x3x384x384 --model_type swint
+```
+
+###  4.3 示例3：融合roformer
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH} --model_type roformer
+```
+
+###  4.4 示例4：融合yolov5s
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH} --model_type yolo
+```
+
+### 4.5 精度验证
+
+#### 4.5.1 示例1：albert模型
+
+模型变量示例：
+
+```
+MODEL_PATH="data/albert/albert-base-squad.onnx"
+MODEL_END_PATH="data/albert/albert-base-squad_end.onnx"
+MODEL_ENGINE_PATH="data/albert/albert-base-squad_end.engine"
+```
+
+运行命令
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH} --dump_onnx
+ixrtexec --onnx ${MODEL_END_PATH} --min_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \
+                                  --opt_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \
+                                  --max_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \
+                                  --save_engine ${MODEL_ENGINE_PATH} --log_level verbose --plugins ixrt_plugin
+ixrtexec --load_engine ${MODEL_ENGINE_PATH} --ort_onnx ${MODEL_PATH} --plugins ixrt_plugin --verify_acc
+```
+
+#### 4.5.2 示例2：swinL模型
+
+模型变量示例：
+
+```
+BS=1
+MODEL_PATH="data/swint/swin-transformer-large.onnx"
+MODEL_END_PATH = "data/swint/swin-transformer-large_end.onnx"
+MODEL_ENGINE_PATH = "data/swint/swin-transformer-large_end.engine"
+MODEL_SIM_STATIC_SIM_PATH = "data/swint/swin-transformer-large_sim_static_sim.onnx"
+```
+
+运行命令
+
+```bash
+cd oss/tools/optimizer
+# 固定输入形状为 ${BS}x3x384x384
+python3 optimizer.py --onnx ${MODEL_PATH} --input_shapes pixel_values.1:${BS}x3x384x384 --model_type swint --dump_onnx
+
+# Build engine
+ixrtexec --onnx ${MODEL_END_PATH} --save_engine ${MODEL_ENGINE_PATH} --log_level verbose --plugins ixrt_plugin
+
+# 测试性能
+ixrtexec --load_engine ${MODEL_ENGINE_PATH} --plugins ixrt_plugin
+
+# 测试精度
+ixrtexec --load_engine ${MODEL_ENGINE_PATH} --ort_onnx ${MODEL_SIM_STATIC_SIM_PATH} --plugins ixrt_plugin --verify_acc
+```
+
+请参考[高级话题](5_advanced_topics.md)中的<u>精度对比工具</u>一节，了解详细使用方法和原理。
+
+也可以用[C++ API 使用简介](3_cpp_api.md)或 [Python API 使用简介](4_python_api.md)
+
+具体使用方法可以参考oss/samples
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/__init__.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/__init__.py
index e69de29b..de522e5b 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/__init__.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_PVT.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_PVT.py
new file mode 100644
index 00000000..3a9c0ca0
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_PVT.py
@@ -0,0 +1,593 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import List, Optional
+
+import onnx
+from onnx import GraphProto, ModelProto, TensorProto, ValueInfoProto, helper
+from passes.fuse_series_bias_add import FusionSerialBiasAdd
+from passes.fusion_albert_attention import FusionAlbertAttention
+from passes.fusion_attention import AttentionMask, FusionAttention
+from passes.fusion_biasgelu import FusionBiasGelu
+from passes.fusion_customfc import (
+    FusionCustomFC,
+    FusionCustomFCActivation,
+    FusionCustomFCGPT2,
+)
+from passes.fusion_disentangled_attention import FusionDisentangledAttention
+from passes.fusion_embedlayer import FusionEmbedLayerNormalization
+from passes.fusion_fastgelu import FusionFastGelu
+from passes.fusion_format_roformer import (
+    FusionFormatInvalidMask,
+    FusionRemoveUselessElementwise,
+)
+from passes.fusion_gelu import FusionGelu
+from passes.fusion_gelu_approximation import FusionGeluApproximation
+from passes.fusion_gpt_attention_no_past import FusionGptAttentionNoPast
+from passes.fusion_layernorm import FusionLayerNormalization, FusionLayerNormalizationTF
+from passes.fusion_options import FusionOptions
+from passes.fusion_qordered_attention import FusionQOrderedAttention
+from passes.fusion_qordered_gelu import FusionQOrderedGelu
+from passes.fusion_qordered_layernorm import FusionQOrderedLayerNormalization
+from passes.fusion_qordered_matmul import FusionQOrderedMatMul
+from passes.fusion_reshape import FusionReshape
+from passes.fusion_shape import FusionShape
+from passes.fusion_skiplayernorm import (
+    FusionBiasSkipLayerNormalization,
+    FusionSkipLayerNormalization,
+)
+
+from passes.fusion_utils import FusionUtils
+
+from passes.fusion_conv_reformat import FusionConvReformat
+
+from passes.fusion_xsoftmax import FusionXSoftmax
+from passes.fusion_PVT_attention import FusionPVTAttention
+from passes.onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+
+class PVTOptimizationOptions(FusionOptions):
+    """This class is deprecated"""
+
+    def __init__(self, model_type):
+        logger.warning(
+            f"PVTOptimizationOptions is depreciated. Please use FusionOptions instead."
+        )
+        super().__init__(model_type)
+
+
+class PVTOnnxModel(OnnxModel):
+    def __init__(self, model: ModelProto, num_heads: int = 0, hidden_size: int = 0):
+        """Initialize BERT ONNX Model.
+
+        Args:
+            model (ModelProto): the ONNX model
+            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
+            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
+        """
+        assert (num_heads == 0 and hidden_size == 0) or (
+            num_heads > 0 and hidden_size % num_heads == 0
+        )
+
+        super().__init__(model)
+        self.num_heads = num_heads
+        self.hidden_size = hidden_size
+
+        self.attention_mask = AttentionMask(self)
+        self.attention_fusion = FusionAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        )
+        self.qordered_attention_fusion = FusionQOrderedAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        )
+        self.utils = FusionUtils(self)
+
+    def fuse_attention(self):
+        self.attention_fusion.apply()
+        FusionAlbertAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        ).apply()
+        # FusionVideoBertAttention(self).apply()
+        # FusionVITAttention(self).apply()
+        # FusionSwinLAttention(self).apply()
+        # FusionGptAttentionNoPast(self).apply()
+        FusionPVTAttention(self).apply()
+        # Only relevant in models with Q-DQ nodes
+        self.qordered_attention_fusion.apply()
+
+    def fuse_format_roformer(self):
+        FusionRemoveUselessElementwise(self).apply()
+        fusion = FusionFormatInvalidMask(self)
+        fusion.apply()
+
+    def fuse_custom_fc(self):
+        fusion = FusionCustomFC(self)
+        fusion.apply()
+
+    def fuse_custom_fc_activation(self):
+        fusion = FusionCustomFCActivation(self)
+        fusion.apply()
+
+    def fuse_custom_fc_gpt2_classify(self):
+        fusion = FusionCustomFCGPT2(self)
+        fusion.apply()
+
+    def fuse_swinT_serial_bias_add(self):
+        fusion = FusionSerialBiasAdd(self)
+        fusion.apply()
+
+    def fuse_gelu(self):
+        fusion = FusionGelu(self)
+        fusion.apply()
+        fusion = FusionFastGelu(self)
+        fusion.apply()
+        # Only relevant in models with Q-DQ nodes
+        fusion = FusionQOrderedGelu(self)
+        fusion.apply()
+
+    def fuse_bias_gelu(self, is_fastgelu):
+        fusion = FusionBiasGelu(self, is_fastgelu)
+        fusion.apply()
+
+    def fuse_custom_xsoftmax(self):
+        fusion = FusionXSoftmax(self)
+        fusion.apply()
+
+    def fuse_disentangled_attention(self):
+        fusion = FusionDisentangledAttention(self)
+        fusion.apply()
+
+    def gelu_approximation(self):
+        fusion = FusionGeluApproximation(self)
+        fusion.apply()
+
+    def fuse_add_bias_skip_layer_norm(self):
+        fusion = FusionBiasSkipLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_reshape(self):
+        fusion = FusionReshape(self)
+        fusion.apply()
+
+    def fuse_shape(self):
+        fusion = FusionShape(self)
+        fusion.apply()
+
+    def fuse_embed_layer(self):
+        fusion = FusionEmbedLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_layer_norm(self):
+        fusion = FusionLayerNormalization(self, self.hidden_size)
+        fusion.apply()
+
+        fusion = FusionLayerNormalizationTF(self)
+        fusion.apply()
+
+        # Only relevant in models with Q-DQ nodes
+        fusion = FusionQOrderedLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_skip_layer_norm(self):
+        fusion = FusionSkipLayerNormalization(self)
+        fusion.apply()
+
+    # Only relevant in models with Q-DQ nodes
+    def fuse_qordered_mamtul(self):
+        fusion = FusionQOrderedMatMul(self)
+        fusion.apply()
+        
+    def conv_reformat(self):
+        fusion = FusionConvReformat(self)
+        fusion.apply()    
+        
+        
+
+    def get_graph_inputs_from_node_type(
+        self, op_type: str, input_indices: List[int], casted: bool
+    ):
+        """
+        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
+        Returns a list of the graph input names based on the filter whether it is casted or not.
+        """
+        graph_inputs = []
+
+        output_name_to_node = self.output_name_to_node()
+        nodes = self.get_nodes_by_op_type(op_type)
+        for node in nodes:
+            bert_inputs = [node.input[i] for i in input_indices if i < len(node.input)]
+            for bert_input in bert_inputs:
+                if self.find_graph_input(bert_input):
+                    if not casted:
+                        graph_inputs.append(bert_input)
+                elif bert_input in output_name_to_node:
+                    parent = output_name_to_node[bert_input]
+                    if (
+                        parent.op_type == "Cast"
+                        and self.find_graph_input(parent.input[0]) is not None
+                    ):
+                        if casted:
+                            graph_inputs.append(parent.input[0])
+        return graph_inputs
+
+    def get_graph_inputs_from_fused_nodes(self, casted: bool):
+        inputs = self.get_graph_inputs_from_node_type(
+            "EmbedLayerNormalization", [0, 1, 7], casted
+        )
+        inputs += self.get_graph_inputs_from_node_type("Attention", [3], casted)
+        return inputs
+
+    def change_graph_input_type(
+        self,
+        graph: GraphProto,
+        graph_input: ValueInfoProto,
+        new_type: int = TensorProto.INT32,
+    ):
+        """Change graph input type, and add Cast node if needed.
+
+        Args:
+            graph (GraphProto): graph
+            graph_input (TensorProto): input of the graph
+            new_type (int, optional): new data type. Defaults to TensorProto.INT32.
+
+        Returns:
+            NodeProto: a new Cast node that added. None if Cast node is not added.
+            List[NodeProto]: Cast nodes that have been removed.
+        """
+        assert isinstance(graph, GraphProto)
+        assert isinstance(graph_input, ValueInfoProto)
+        assert self.find_graph_input(graph_input.name)
+
+        if graph_input.type.tensor_type.elem_type == int(new_type):
+            return None, []
+
+        new_cast_node = None
+        nodes_to_remove = []
+
+        input_name_to_nodes = self.input_name_to_nodes()
+        if graph_input.name in input_name_to_nodes:
+            nodes = input_name_to_nodes[graph_input.name]
+
+            # For children that is not Cast node, insert a Cast node to convert int32 to original data type.
+            nodes_not_cast = [node for node in nodes if node.op_type != "Cast"]
+            if nodes_not_cast:
+                node_name = self.create_node_name("Cast")
+                output_name = node_name + "_" + graph_input.name
+                new_value_info = graph.value_info.add()
+                new_value_info.CopyFrom(graph_input)
+                new_value_info.name = output_name
+                new_cast_node = helper.make_node(
+                    "Cast",
+                    [graph_input.name],
+                    [output_name],
+                    to=int(graph_input.type.tensor_type.elem_type),
+                    name=node_name,
+                )
+                graph.node.extend([new_cast_node])
+
+                for node in nodes_not_cast:
+                    OnnxModel.replace_node_input(node, graph_input.name, output_name)
+
+            # For children that is Cast node, no need to insert Cast.
+            # When the children is Cast to int32, we can remove that Cast node since input type is int32 now.
+            nodes_cast = [node for node in nodes if node.op_type == "Cast"]
+            for node in nodes_cast:
+                if OnnxModel.get_node_attribute(node, "to") == int(new_type):
+                    self.replace_input_of_all_nodes(node.output[0], graph_input.name)
+                if not self.find_graph_output(node.output[0]):
+                    nodes_to_remove.append(node)
+            if nodes_to_remove:
+                self.remove_nodes(nodes_to_remove)
+
+        graph_input.type.tensor_type.elem_type = int(new_type)
+        return new_cast_node, nodes_to_remove
+
+    def change_graph_inputs_to_int32(self):
+        """Change data type of all graph inputs to int32 type, and add Cast node if needed."""
+        graph = self.graph()
+        add_cast_count = 0
+        remove_cast_count = 0
+        for graph_input in graph.input:
+            new_node, removed_nodes = self.change_graph_input_type(
+                graph, graph_input, TensorProto.INT32
+            )
+            if new_node:
+                add_cast_count += 1
+            remove_cast_count += len(removed_nodes)
+        logger.info(
+            f"Graph inputs are changed to int32. Added {add_cast_count} Cast nodes, and removed {remove_cast_count} Cast nodes."
+        )
+
+    def use_dynamic_axes(
+        self, dynamic_batch_dim="batch_size", dynamic_seq_len="max_seq_len"
+    ):
+        """
+        Update input and output shape to use dynamic axes.
+        """
+        bert_graph_inputs = self.get_graph_inputs_from_fused_nodes(
+            casted=True
+        ) + self.get_graph_inputs_from_fused_nodes(casted=False)
+
+        dynamic_batch_inputs = {}
+        for input in self.model.graph.input:
+            if input.name in bert_graph_inputs:
+                dim_proto = input.type.tensor_type.shape.dim[0]
+                dim_proto.dim_param = dynamic_batch_dim
+                if dynamic_seq_len is not None:
+                    dim_proto = input.type.tensor_type.shape.dim[1]
+                    dim_proto.dim_param = dynamic_seq_len
+
+        for output in self.model.graph.output:
+            dim_proto = output.type.tensor_type.shape.dim[0]
+            dim_proto.dim_param = dynamic_batch_dim
+
+    def preprocess(self):
+        self.adjust_reshape_and_expand()
+        return
+
+    def adjust_reshape_and_expand(self):
+        nodes_to_remove = []
+        for node in self.nodes():
+            if node.op_type == "Reshape":
+                # Clean up unneccessary reshape nodes.
+                # Find reshape nodes with no actually data in "shape" attribute and remove.
+                reshape_shape = self.get_constant_value(node.input[1])
+                if reshape_shape is not None and reshape_shape.size == 0:
+                    nodes_to_remove.extend([node])
+                    self.replace_input_of_all_nodes(node.output[0], node.input[0])
+                    continue
+
+                # Find path "Slice" -> "Reshape" -> "Expand" -> "Expand" -> current "Reshape", simplify the graph by
+                # changing current reshape's input to output of slice.
+                reshape_path = self.match_parent_path(
+                    node,
+                    ["Expand", "Expand", "Reshape", "Slice"],
+                    [0, 0, 0, 0],
+                    self.output_name_to_node(),
+                )
+                if reshape_path is not None:
+                    expand_node = reshape_path[-3]
+                    expand_shape_value = self.get_constant_value(expand_node.input[1])
+
+                    reshape_before_expand = reshape_path[-2]
+                    shape_value = self.get_constant_value(
+                        reshape_before_expand.input[1]
+                    )
+
+                    slice_node = reshape_path[-1]
+                    if (
+                        expand_shape_value is not None
+                        and shape_value is not None
+                        and len(expand_shape_value) == 2
+                        and len(shape_value) == 1
+                        and expand_shape_value[1] == shape_value[0]
+                    ):
+                        node.input[0] = slice_node.output[0]
+
+        if nodes_to_remove:
+            self.remove_nodes(nodes_to_remove)
+            logger.info(f"Removed Reshape and Expand count: {len(nodes_to_remove)}")
+
+    def clean_graph(self):
+        output_name_to_node = self.output_name_to_node()
+        nodes_to_remove = []
+        for node in self.nodes():
+            # Before:
+            #  input_ids --> Shape --> Gather(indices=0) --> Unsqueeze ------+
+            #          |                                                     |
+            #          |                                                     v
+            #          +----> Shape --> Gather(indices=1) --> Unsqueeze--->  Concat --> ConstantOfShape -->Cast --> EmbedLayerNormaliation/ReduceSum
+            # After:
+            #  input_ids --> Shape                                                  --> ConstantOfShape -->Cast --> EmbedLayerNormaliation/ReduceSum
+            # TODO: merge ConstantOfShape -->Cast to ConstantOfShape (need update the data type of value)
+            op_input_id = {"EmbedLayerNormalization": 1, "ReduceSum": 0, "Attention": 3}
+            if node.op_type in op_input_id:
+                i = op_input_id[node.op_type]
+                parent_nodes = self.match_parent_path(
+                    node,
+                    [
+                        "Cast",
+                        "ConstantOfShape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                    ],
+                    [i, 0, 0, 0, 0, 0],
+                    output_name_to_node,
+                )
+                if parent_nodes is not None:
+                    (
+                        cast,
+                        constantOfShape,
+                        concat,
+                        unsqueeze,
+                        gather,
+                        shape,
+                    ) = parent_nodes
+                    if shape.input[0] == self.graph().input[0].name:
+                        constantOfShape.input[0] = shape.output[0]
+                        output_name_to_node = self.output_name_to_node()
+
+            if node.op_type == "Attention":
+                # Before:
+                #   input_ids --> Shape -->ConstantOfShape -->Cast --> ReduceSum --> Attention
+                # After:
+                #   remove this path, and remove the optional mask_index input of Attention node.
+                parent_nodes = self.match_parent_path(
+                    node,
+                    ["ReduceSum", "Cast", "ConstantOfShape", "Shape"],
+                    [3, 0, 0, 0],
+                    output_name_to_node,
+                )
+                if parent_nodes is not None:
+                    if parent_nodes[-1].input[0] == self.graph().input[0].name:
+                        attention_node = helper.make_node(
+                            "Attention",
+                            inputs=node.input[0 : len(node.input) - 1],
+                            outputs=node.output,
+                            name=node.name + "_remove_mask",
+                        )
+                        attention_node.domain = "com.microsoft"
+                        attention_node.attribute.extend(
+                            [helper.make_attribute("num_heads", self.num_heads)]
+                        )
+                        self.add_node(
+                            attention_node, self.get_graph_by_node(attention_node).name
+                        )
+                        nodes_to_remove.append(node)
+        self.remove_nodes(nodes_to_remove)
+
+    def postprocess(self):
+        self.clean_graph()
+        self.prune_graph()
+
+    def optimize(
+        self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False
+    ):
+        if (options is not None) and not options.enable_shape_inference:
+            self.disable_shape_inference()
+
+        self.utils.remove_identity_nodes()
+
+        # Remove cast nodes that having same data type of input and output based on symbolic shape inference.
+        self.utils.remove_useless_cast_nodes()
+
+        if (options is None) or options.enable_layer_norm:
+            self.fuse_layer_norm()
+
+        if (options is None) or options.enable_gelu:
+            self.fuse_gelu()
+
+        self.preprocess()
+
+        self.fuse_reshape()
+
+        if (options is None) or options.enable_skip_layer_norm:
+            self.fuse_skip_layer_norm()
+
+        if options.enable_swint_opt:
+            self.fuse_custom_fc()
+            self.fuse_swinT_serial_bias_add()
+
+        if options.enable_format_roformer:
+            self.fuse_format_roformer()
+
+        if options.enable_gpt2_classify or options.enable_vit:
+            self.fuse_custom_fc_gpt2_classify()
+
+        if options.enable_vit:
+            self.fuse_custom_fc()
+
+        # if (options is None) or options.enable_attention:
+        #     if options is not None:
+        #         self.attention_mask.set_mask_format(options.attention_mask_format)
+        self.fuse_attention()
+        
+        self.conv_reformat()
+
+        if (options is None) or options.enable_skip_layer_norm:
+            self.fuse_skip_layer_norm()
+
+        self.fuse_custom_fc()
+
+        self.fuse_custom_xsoftmax()
+
+        self.fuse_disentangled_attention()
+
+        # Perform the MatMul fusion after the Attention fusion as we do not
+        # want to fuse the MatMuls inside the Attention subgraphs
+        if (options is None) or options.enable_qordered_matmul:
+            self.fuse_qordered_mamtul()
+
+        self.fuse_shape()
+
+        if (options is None) or options.enable_embed_layer_norm:
+            self.fuse_embed_layer()
+
+        # Remove reshape nodes that having same shape of input and output based on symbolic shape inference.
+        self.utils.remove_useless_reshape_nodes()
+
+        self.postprocess()
+
+        # Bias fusion is done after postprocess to avoid extra Reshape between bias and Gelu/FastGelu/SkipLayerNormalization
+        if (options is None) or options.enable_bias_gelu:
+            # Fuse Gelu and Add Bias before it.
+            self.fuse_bias_gelu(is_fastgelu=True)
+            self.fuse_bias_gelu(is_fastgelu=False)
+
+        if (options is None) or options.enable_bias_skip_layer_norm:
+            # Fuse SkipLayerNormalization and Add Bias before it.
+            self.fuse_add_bias_skip_layer_norm()
+
+        if options is not None and options.enable_gelu_approximation:
+            self.gelu_approximation()
+
+        self.fuse_custom_fc_activation()
+
+        self.remove_unused_constant()
+
+        # Use symbolic batch dimension in input and output.
+        if add_dynamic_axes:
+            self.use_dynamic_axes()
+
+        logger.info(f"opset version: {self.get_opset_version()}")
+
+    def get_fused_operator_statistics(self):
+        """
+        Returns node count of fused operators.
+        """
+        op_count = {}
+        ops = [
+            "EmbedLayerNormalization",
+            "Attention",
+            "QOrderedAttention",
+            "Gelu",
+            "QOrderedGelu",
+            "FastGelu",
+            "BiasGelu",
+            "LayerNormalization",
+            "QOrderedLayerNormalization",
+            "SkipLayerNormalization",
+            "QOrderedMatMul",
+        ]
+        for op in ops:
+            nodes = self.get_nodes_by_op_type(op)
+            op_count[op] = len(nodes)
+        logger.info(f"Optimized operators:{op_count}")
+        return op_count
+
+    def is_fully_optimized(self):
+        """
+        Returns True when the model is fully optimized.
+        """
+        op_count = self.get_fused_operator_statistics()
+        embed = op_count["EmbedLayerNormalization"]
+        attention = op_count["Attention"] + op_count["QOrderedAttention"]
+        gelu = op_count["Gelu"] + op_count["BiasGelu"] + op_count["FastGelu"]
+        layer_norm = op_count["LayerNormalization"] + op_count["SkipLayerNormalization"]
+        is_perfect = (
+            (embed > 0)
+            and (attention > 0)
+            and (attention == gelu)
+            and (layer_norm >= 2 * attention)
+        )
+
+        if layer_norm == 0:
+            logger.debug("Layer Normalization not fused")
+
+        if gelu == 0:
+            logger.debug("Gelu/FastGelu not fused")
+
+        if embed == 0:
+            logger.debug("Embed Layer not fused")
+
+        if attention == 0:
+            logger.warning("Attention not fused")
+
+        return is_perfect
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_bert.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_bert.py
index 7c40a978..7324603e 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_bert.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_bert.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -16,6 +32,7 @@ from passes.fusion_customfc import (
     FusionCustomFC,
     FusionCustomFCActivation,
     FusionCustomFCGPT2,
+    FusionTorchvisionVitCustomFC,
 )
 from passes.fusion_disentangled_attention import FusionDisentangledAttention
 from passes.fusion_embedlayer import FusionEmbedLayerNormalization
@@ -42,8 +59,11 @@ from passes.fusion_skiplayernorm import (
 from passes.fusion_swinl_attention import FusionSwinLAttention
 from passes.fusion_utils import FusionUtils
 from passes.fusion_videobert_attention import FusionVideoBertAttention
-from passes.fusion_vit_attention import FusionVITAttention
+from passes.fusion_vit_attention import FusionVITAttention, FusionTorchvisionVITAttention
 from passes.fusion_xsoftmax import FusionXSoftmax
+from passes.fuse_inverse_sigmoid import FusionLayerInverseSigmoid
+from passes.fuse_l2_normalization import FusionLayerL2Normalization
+from passes.fuse_omdet_attention import FusionLayerOmdetAttention
 from passes.onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -92,6 +112,7 @@ class BertOnnxModel(OnnxModel):
         ).apply()
         FusionVideoBertAttention(self).apply()
         FusionVITAttention(self).apply()
+        FusionTorchvisionVITAttention(self).apply()
         FusionSwinLAttention(self).apply()
         FusionGptAttentionNoPast(self).apply()
         # Only relevant in models with Q-DQ nodes
@@ -106,6 +127,10 @@ class BertOnnxModel(OnnxModel):
         fusion = FusionCustomFC(self)
         fusion.apply()
 
+    def fuse_custom_fc_torchvision_vit(self):
+        fusion = FusionTorchvisionVitCustomFC(self)
+        fusion.apply()
+    
     def fuse_custom_fc_activation(self):
         fusion = FusionCustomFCActivation(self)
         fusion.apply()
@@ -179,6 +204,18 @@ class BertOnnxModel(OnnxModel):
         fusion = FusionQOrderedMatMul(self)
         fusion.apply()
 
+    def fuse_omdet_inverse_sigmoid(self):
+        fusion = FusionLayerInverseSigmoid(self)
+        fusion.apply()
+
+    def fuse_omdet_attention(self):
+        fusion = FusionLayerOmdetAttention(self)
+        fusion.apply()
+
+    def fuse_l2_normalization(self):
+        fusion = FusionLayerL2Normalization(self)
+        fusion.apply()
+
     def get_graph_inputs_from_node_type(
         self, op_type: str, input_indices: List[int], casted: bool
     ):
@@ -484,6 +521,11 @@ class BertOnnxModel(OnnxModel):
             self.fuse_skip_layer_norm()
 
         self.fuse_custom_fc()
+        
+        if options.enable_omdet:
+            self.fuse_omdet_attention()
+            self.fuse_omdet_inverse_sigmoid()
+            self.fuse_l2_normalization()
 
         self.fuse_custom_xsoftmax()
 
@@ -518,6 +560,9 @@ class BertOnnxModel(OnnxModel):
             self.gelu_approximation()
 
         self.fuse_custom_fc_activation()
+        
+        if options.enable_vit:
+            self.fuse_custom_fc_torchvision_vit()
 
         self.remove_unused_constant()
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_conformer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_conformer.py
index a250a9ea..cc59c37b 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_conformer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_conformer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -573,4 +589,3 @@ class conformerOnnxModel(OnnxModel):
             logger.warning("Attention not fused")
 
         return is_perfect
-
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_cosyvoice.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_cosyvoice.py
new file mode 100755
index 00000000..98cfc669
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_cosyvoice.py
@@ -0,0 +1,640 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import List, Optional
+
+import onnx
+from onnx import GraphProto, ModelProto, TensorProto, ValueInfoProto, helper
+from passes.fuse_series_bias_add import FusionSerialBiasAdd
+from passes.fusion_albert_attention import FusionAlbertAttention
+from passes.fusion_attention import AttentionMask, FusionAttention
+from passes.fusion_biasgelu import FusionBiasGelu
+from passes.fusion_customfc import (
+    FusionCustomFC,
+    FusionCustomFCActivation,
+    FusionCustomFCGPT2,
+    FusionTorchvisionVitCustomFC,
+)
+from passes.fusion_disentangled_attention import FusionDisentangledAttention
+from passes.fusion_embedlayer import FusionEmbedLayerNormalization
+from passes.fusion_fastgelu import FusionFastGelu
+from passes.fusion_format_roformer import (
+    FusionFormatInvalidMask,
+    FusionRemoveUselessElementwise,
+)
+from passes.fusion_gelu import FusionGelu
+from passes.fusion_gelu_approximation import FusionGeluApproximation
+from passes.fusion_gpt_attention_no_past import FusionGptAttentionNoPast
+from passes.fusion_layernorm import FusionLayerNormalization, FusionLayerNormalizationTF
+from passes.fusion_options import FusionOptions
+from passes.fusion_qordered_attention import FusionQOrderedAttention
+from passes.fusion_qordered_gelu import FusionQOrderedGelu
+from passes.fusion_qordered_layernorm import FusionQOrderedLayerNormalization
+from passes.fusion_qordered_matmul import FusionQOrderedMatMul
+from passes.fusion_reshape import FusionReshape
+from passes.fusion_shape import FusionShape
+from passes.fusion_skiplayernorm import (
+    FusionBiasSkipLayerNormalization,
+    FusionSkipLayerNormalization,
+)
+from passes.fusion_swinl_attention import FusionSwinLAttention
+from passes.fusion_utils import FusionUtils
+from passes.fusion_videobert_attention import FusionVideoBertAttention
+from passes.fusion_vit_attention import FusionVITAttention, FusionTorchvisionVITAttention
+from passes.fusion_xsoftmax import FusionXSoftmax
+from passes.fuse_inverse_sigmoid import FusionLayerInverseSigmoid
+from passes.fuse_l2_normalization import FusionLayerL2Normalization
+from passes.fuse_omdet_attention import FusionLayerOmdetAttention
+from passes.onnx_model import OnnxModel
+
+from passes.fusion_cosyvoice_splitQKV_update_KVcache import FusionCosyVoiceSplitQKVUpdateKVCache
+from passes.fusion_cosyvoice_attention import (
+    FusionCosyvoiceAttention
+)
+from passes.fusion_cosyvoice_splitQKV import FusionSplitQKV
+
+
+
+logger = getLogger(__name__)
+
+
+
+class cosyvoiceOnnxModel(OnnxModel):
+    def __init__(self, model: ModelProto, num_heads: int = 16, hidden_size: int = 1024):
+        """Initialize BERT ONNX Model.
+
+        Args:
+            model (ModelProto): the ONNX model
+            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
+            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
+        """
+        assert (num_heads == 0 and hidden_size == 0) or (
+            num_heads > 0 and hidden_size % num_heads == 0
+        )
+
+        super().__init__(model)
+        self.num_heads = num_heads
+        self.hidden_size = hidden_size
+
+        self.attention_mask = AttentionMask(self)
+        self.attention_fusion = FusionAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        )
+        self.qordered_attention_fusion = FusionQOrderedAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        )
+        self.utils = FusionUtils(self)
+
+    def fuse_attention(self):
+        self.attention_fusion.apply()
+        FusionAlbertAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        ).apply()
+        FusionVideoBertAttention(self).apply()
+        FusionVITAttention(self).apply()
+        FusionTorchvisionVITAttention(self).apply()
+        FusionSwinLAttention(self).apply()
+        FusionGptAttentionNoPast(self).apply()
+        # Only relevant in models with Q-DQ nodes
+        self.qordered_attention_fusion.apply()
+
+    def fuse_format_roformer(self):
+        FusionRemoveUselessElementwise(self).apply()
+        fusion = FusionFormatInvalidMask(self)
+        fusion.apply()
+
+    def fuse_custom_fc(self):
+        fusion = FusionCustomFC(self)
+        fusion.apply()
+
+    def fuse_custom_fc_torchvision_vit(self):
+        fusion = FusionTorchvisionVitCustomFC(self)
+        fusion.apply()
+    
+    def fuse_custom_fc_activation(self):
+        fusion = FusionCustomFCActivation(self)
+        fusion.apply()
+
+    def fuse_custom_fc_gpt2_classify(self):
+        fusion = FusionCustomFCGPT2(self)
+        fusion.apply()
+
+    def fuse_swinT_serial_bias_add(self):
+        fusion = FusionSerialBiasAdd(self)
+        fusion.apply()
+
+    def fuse_gelu(self):
+        fusion = FusionGelu(self)
+        fusion.apply()
+        fusion = FusionFastGelu(self)
+        fusion.apply()
+        # Only relevant in models with Q-DQ nodes
+        fusion = FusionQOrderedGelu(self)
+        fusion.apply()
+
+    def fuse_bias_gelu(self, is_fastgelu):
+        fusion = FusionBiasGelu(self, is_fastgelu)
+        fusion.apply()
+
+    def fuse_custom_xsoftmax(self):
+        fusion = FusionXSoftmax(self)
+        fusion.apply()
+
+    def fuse_disentangled_attention(self):
+        fusion = FusionDisentangledAttention(self)
+        fusion.apply()
+
+    def gelu_approximation(self):
+        fusion = FusionGeluApproximation(self)
+        fusion.apply()
+
+    def fuse_add_bias_skip_layer_norm(self):
+        fusion = FusionBiasSkipLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_reshape(self):
+        fusion = FusionReshape(self)
+        fusion.apply()
+
+    def fuse_shape(self):
+        fusion = FusionShape(self)
+        fusion.apply()
+
+    def fuse_embed_layer(self):
+        fusion = FusionEmbedLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_layer_norm(self):
+        fusion = FusionLayerNormalization(self, self.hidden_size)
+        fusion.apply()
+
+        fusion = FusionLayerNormalizationTF(self)
+        fusion.apply()
+
+        # Only relevant in models with Q-DQ nodes
+        fusion = FusionQOrderedLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_skip_layer_norm(self):
+        fusion = FusionSkipLayerNormalization(self)
+        fusion.apply()
+
+    # Only relevant in models with Q-DQ nodes
+    def fuse_qordered_mamtul(self):
+        fusion = FusionQOrderedMatMul(self)
+        fusion.apply()
+
+    def fuse_omdet_inverse_sigmoid(self):
+        fusion = FusionLayerInverseSigmoid(self)
+        fusion.apply()
+
+    def fuse_omdet_attention(self):
+        fusion = FusionLayerOmdetAttention(self)
+        fusion.apply()
+
+    def fuse_l2_normalization(self):
+        fusion = FusionLayerL2Normalization(self)
+        fusion.apply()
+        
+    def fuse_splitQKV_update_kv_cache(self):        
+        fusion = FusionCosyVoiceSplitQKVUpdateKVCache(self, self.hidden_size, self.num_heads)
+        fusion.apply()
+        
+    def fuse_cosyvoice_attention(self):
+        fusion = FusionCosyvoiceAttention(self)
+        fusion.apply() 
+    
+    def fuse_cosyvoice_split_qkv(self):
+        fusion = FusionSplitQKV(self, self.hidden_size, self.num_heads)
+        fusion.apply()       
+   
+
+    def get_graph_inputs_from_node_type(
+        self, op_type: str, input_indices: List[int], casted: bool
+    ):
+        """
+        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
+        Returns a list of the graph input names based on the filter whether it is casted or not.
+        """
+        graph_inputs = []
+
+        output_name_to_node = self.output_name_to_node()
+        nodes = self.get_nodes_by_op_type(op_type)
+        for node in nodes:
+            bert_inputs = [node.input[i] for i in input_indices if i < len(node.input)]
+            for bert_input in bert_inputs:
+                if self.find_graph_input(bert_input):
+                    if not casted:
+                        graph_inputs.append(bert_input)
+                elif bert_input in output_name_to_node:
+                    parent = output_name_to_node[bert_input]
+                    if (
+                        parent.op_type == "Cast"
+                        and self.find_graph_input(parent.input[0]) is not None
+                    ):
+                        if casted:
+                            graph_inputs.append(parent.input[0])
+        return graph_inputs
+
+    def get_graph_inputs_from_fused_nodes(self, casted: bool):
+        inputs = self.get_graph_inputs_from_node_type(
+            "EmbedLayerNormalization", [0, 1, 7], casted
+        )
+        inputs += self.get_graph_inputs_from_node_type("Attention", [3], casted)
+        return inputs
+
+    def change_graph_input_type(
+        self,
+        graph: GraphProto,
+        graph_input: ValueInfoProto,
+        new_type: int = TensorProto.INT32,
+    ):
+        """Change graph input type, and add Cast node if needed.
+
+        Args:
+            graph (GraphProto): graph
+            graph_input (TensorProto): input of the graph
+            new_type (int, optional): new data type. Defaults to TensorProto.INT32.
+
+        Returns:
+            NodeProto: a new Cast node that added. None if Cast node is not added.
+            List[NodeProto]: Cast nodes that have been removed.
+        """
+        assert isinstance(graph, GraphProto)
+        assert isinstance(graph_input, ValueInfoProto)
+        assert self.find_graph_input(graph_input.name)
+
+        if graph_input.type.tensor_type.elem_type == int(new_type):
+            return None, []
+
+        new_cast_node = None
+        nodes_to_remove = []
+
+        input_name_to_nodes = self.input_name_to_nodes()
+        if graph_input.name in input_name_to_nodes:
+            nodes = input_name_to_nodes[graph_input.name]
+
+            # For children that is not Cast node, insert a Cast node to convert int32 to original data type.
+            nodes_not_cast = [node for node in nodes if node.op_type != "Cast"]
+            if nodes_not_cast:
+                node_name = self.create_node_name("Cast")
+                output_name = node_name + "_" + graph_input.name
+                new_value_info = graph.value_info.add()
+                new_value_info.CopyFrom(graph_input)
+                new_value_info.name = output_name
+                new_cast_node = helper.make_node(
+                    "Cast",
+                    [graph_input.name],
+                    [output_name],
+                    to=int(graph_input.type.tensor_type.elem_type),
+                    name=node_name,
+                )
+                graph.node.extend([new_cast_node])
+
+                for node in nodes_not_cast:
+                    OnnxModel.replace_node_input(node, graph_input.name, output_name)
+
+            # For children that is Cast node, no need to insert Cast.
+            # When the children is Cast to int32, we can remove that Cast node since input type is int32 now.
+            nodes_cast = [node for node in nodes if node.op_type == "Cast"]
+            for node in nodes_cast:
+                if OnnxModel.get_node_attribute(node, "to") == int(new_type):
+                    self.replace_input_of_all_nodes(node.output[0], graph_input.name)
+                if not self.find_graph_output(node.output[0]):
+                    nodes_to_remove.append(node)
+            if nodes_to_remove:
+                self.remove_nodes(nodes_to_remove)
+
+        graph_input.type.tensor_type.elem_type = int(new_type)
+        return new_cast_node, nodes_to_remove
+
+    def change_graph_inputs_to_int32(self):
+        """Change data type of all graph inputs to int32 type, and add Cast node if needed."""
+        graph = self.graph()
+        add_cast_count = 0
+        remove_cast_count = 0
+        for graph_input in graph.input:
+            new_node, removed_nodes = self.change_graph_input_type(
+                graph, graph_input, TensorProto.INT32
+            )
+            if new_node:
+                add_cast_count += 1
+            remove_cast_count += len(removed_nodes)
+        logger.info(
+            f"Graph inputs are changed to int32. Added {add_cast_count} Cast nodes, and removed {remove_cast_count} Cast nodes."
+        )
+
+    def use_dynamic_axes(
+        self, dynamic_batch_dim="batch_size", dynamic_seq_len="max_seq_len"
+    ):
+        """
+        Update input and output shape to use dynamic axes.
+        """
+        bert_graph_inputs = self.get_graph_inputs_from_fused_nodes(
+            casted=True
+        ) + self.get_graph_inputs_from_fused_nodes(casted=False)
+
+        dynamic_batch_inputs = {}
+        for input in self.model.graph.input:
+            if input.name in bert_graph_inputs:
+                dim_proto = input.type.tensor_type.shape.dim[0]
+                dim_proto.dim_param = dynamic_batch_dim
+                if dynamic_seq_len is not None:
+                    dim_proto = input.type.tensor_type.shape.dim[1]
+                    dim_proto.dim_param = dynamic_seq_len
+
+        for output in self.model.graph.output:
+            dim_proto = output.type.tensor_type.shape.dim[0]
+            dim_proto.dim_param = dynamic_batch_dim
+
+    def preprocess(self):
+        self.adjust_reshape_and_expand()
+        return
+
+    def adjust_reshape_and_expand(self):
+        nodes_to_remove = []
+        for node in self.nodes():
+            if node.op_type == "Reshape":
+                # Clean up unneccessary reshape nodes.
+                # Find reshape nodes with no actually data in "shape" attribute and remove.
+                reshape_shape = self.get_constant_value(node.input[1])
+                if reshape_shape is not None and reshape_shape.size == 0:
+                    nodes_to_remove.extend([node])
+                    self.replace_input_of_all_nodes(node.output[0], node.input[0])
+                    continue
+
+                # Find path "Slice" -> "Reshape" -> "Expand" -> "Expand" -> current "Reshape", simplify the graph by
+                # changing current reshape's input to output of slice.
+                reshape_path = self.match_parent_path(
+                    node,
+                    ["Expand", "Expand", "Reshape", "Slice"],
+                    [0, 0, 0, 0],
+                    self.output_name_to_node(),
+                )
+                if reshape_path is not None:
+                    expand_node = reshape_path[-3]
+                    expand_shape_value = self.get_constant_value(expand_node.input[1])
+
+                    reshape_before_expand = reshape_path[-2]
+                    shape_value = self.get_constant_value(
+                        reshape_before_expand.input[1]
+                    )
+
+                    slice_node = reshape_path[-1]
+                    if (
+                        expand_shape_value is not None
+                        and shape_value is not None
+                        and len(expand_shape_value) == 2
+                        and len(shape_value) == 1
+                        and expand_shape_value[1] == shape_value[0]
+                    ):
+                        node.input[0] = slice_node.output[0]
+
+        if nodes_to_remove:
+            self.remove_nodes(nodes_to_remove)
+            logger.info(f"Removed Reshape and Expand count: {len(nodes_to_remove)}")
+
+    def clean_graph(self):
+        output_name_to_node = self.output_name_to_node()
+        nodes_to_remove = []
+        for node in self.nodes():
+            # Before:
+            #  input_ids --> Shape --> Gather(indices=0) --> Unsqueeze ------+
+            #          |                                                     |
+            #          |                                                     v
+            #          +----> Shape --> Gather(indices=1) --> Unsqueeze--->  Concat --> ConstantOfShape -->Cast --> EmbedLayerNormaliation/ReduceSum
+            # After:
+            #  input_ids --> Shape                                                  --> ConstantOfShape -->Cast --> EmbedLayerNormaliation/ReduceSum
+            # TODO: merge ConstantOfShape -->Cast to ConstantOfShape (need update the data type of value)
+            op_input_id = {"EmbedLayerNormalization": 1, "ReduceSum": 0, "Attention": 3}
+            if node.op_type in op_input_id:
+                i = op_input_id[node.op_type]
+                parent_nodes = self.match_parent_path(
+                    node,
+                    [
+                        "Cast",
+                        "ConstantOfShape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                    ],
+                    [i, 0, 0, 0, 0, 0],
+                    output_name_to_node,
+                )
+                if parent_nodes is not None:
+                    (
+                        cast,
+                        constantOfShape,
+                        concat,
+                        unsqueeze,
+                        gather,
+                        shape,
+                    ) = parent_nodes
+                    if shape.input[0] == self.graph().input[0].name:
+                        constantOfShape.input[0] = shape.output[0]
+                        output_name_to_node = self.output_name_to_node()
+
+            if node.op_type == "Attention":
+                # Before:
+                #   input_ids --> Shape -->ConstantOfShape -->Cast --> ReduceSum --> Attention
+                # After:
+                #   remove this path, and remove the optional mask_index input of Attention node.
+                parent_nodes = self.match_parent_path(
+                    node,
+                    ["ReduceSum", "Cast", "ConstantOfShape", "Shape"],
+                    [3, 0, 0, 0],
+                    output_name_to_node,
+                )
+                if parent_nodes is not None:
+                    if parent_nodes[-1].input[0] == self.graph().input[0].name:
+                        attention_node = helper.make_node(
+                            "Attention",
+                            inputs=node.input[0 : len(node.input) - 1],
+                            outputs=node.output,
+                            name=node.name + "_remove_mask",
+                        )
+                        attention_node.domain = "com.microsoft"
+                        attention_node.attribute.extend(
+                            [helper.make_attribute("num_heads", self.num_heads)]
+                        )
+                        self.add_node(
+                            attention_node, self.get_graph_by_node(attention_node).name
+                        )
+                        nodes_to_remove.append(node)
+        self.remove_nodes(nodes_to_remove)
+
+    def postprocess(self):
+        self.clean_graph()
+        self.prune_graph()
+
+    def optimize(
+        self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False
+    ):
+        if (options is not None) and not options.enable_shape_inference:
+            self.disable_shape_inference()
+
+        self.utils.remove_identity_nodes()
+
+        # Remove cast nodes that having same data type of input and output based on symbolic shape inference.
+        self.utils.remove_useless_cast_nodes()
+
+        if (options is None) or options.enable_layer_norm:
+            self.fuse_layer_norm()
+
+        if (options is None) or options.enable_gelu:
+            self.fuse_gelu()
+
+        self.preprocess()
+
+        self.fuse_reshape()
+
+        if (options is None) or options.enable_skip_layer_norm:
+            self.fuse_skip_layer_norm()
+
+        if options.enable_swint_opt:
+            self.fuse_custom_fc()
+            self.fuse_swinT_serial_bias_add()
+
+        if options.enable_format_roformer:
+            self.fuse_format_roformer()
+
+        if options.enable_gpt2_classify or options.enable_vit:
+            self.fuse_custom_fc_gpt2_classify()
+
+        if options.enable_vit:
+            self.fuse_custom_fc()
+
+        if (options is None) or options.enable_attention:
+            if options is not None:
+                self.attention_mask.set_mask_format(options.attention_mask_format)
+            self.fuse_attention()
+
+        if (options is None) or options.enable_skip_layer_norm:
+            self.fuse_skip_layer_norm()
+
+        self.fuse_custom_fc()
+        
+        if options.enable_omdet:
+            self.fuse_omdet_attention()
+            self.fuse_omdet_inverse_sigmoid()
+            self.fuse_l2_normalization()
+            
+        self.fuse_splitQKV_update_kv_cache()
+        self.fuse_cosyvoice_attention()
+        self.fuse_cosyvoice_split_qkv()
+        
+        
+        # Perform the MatMul fusion after the Attention fusion as we do not
+        # want to fuse the MatMuls inside the Attention subgraphs
+        if (options is None) or options.enable_qordered_matmul:
+            self.fuse_qordered_mamtul()
+
+        self.fuse_shape()
+
+        if (options is None) or options.enable_embed_layer_norm:
+            self.fuse_embed_layer()
+
+        # Remove reshape nodes that having same shape of input and output based on symbolic shape inference.
+        self.utils.remove_useless_reshape_nodes()
+
+        self.postprocess()
+
+        # Bias fusion is done after postprocess to avoid extra Reshape between bias and Gelu/FastGelu/SkipLayerNormalization
+        if (options is None) or options.enable_bias_gelu:
+            # Fuse Gelu and Add Bias before it.
+            self.fuse_bias_gelu(is_fastgelu=True)
+            self.fuse_bias_gelu(is_fastgelu=False)
+
+        if (options is None) or options.enable_bias_skip_layer_norm:
+            # Fuse SkipLayerNormalization and Add Bias before it.
+            self.fuse_add_bias_skip_layer_norm()
+
+        if options is not None and options.enable_gelu_approximation:
+            self.gelu_approximation()
+
+        self.fuse_custom_fc_activation()
+        
+        if options.enable_vit:
+            self.fuse_custom_fc_torchvision_vit()
+
+        self.remove_unused_constant()
+
+        # Use symbolic batch dimension in input and output.
+        if add_dynamic_axes:
+            self.use_dynamic_axes()
+
+        logger.info(f"opset version: {self.get_opset_version()}")
+
+    def get_fused_operator_statistics(self):
+        """
+        Returns node count of fused operators.
+        """
+        op_count = {}
+        ops = [
+            "EmbedLayerNormalization",
+            "Attention",
+            "QOrderedAttention",
+            "Gelu",
+            "QOrderedGelu",
+            "FastGelu",
+            "BiasGelu",
+            "LayerNormalization",
+            "QOrderedLayerNormalization",
+            "SkipLayerNormalization",
+            "QOrderedMatMul",
+        ]
+        for op in ops:
+            nodes = self.get_nodes_by_op_type(op)
+            op_count[op] = len(nodes)
+        logger.info(f"Optimized operators:{op_count}")
+        return op_count
+
+    def is_fully_optimized(self):
+        """
+        Returns True when the model is fully optimized.
+        """
+        op_count = self.get_fused_operator_statistics()
+        embed = op_count["EmbedLayerNormalization"]
+        attention = op_count["Attention"] + op_count["QOrderedAttention"]
+        gelu = op_count["Gelu"] + op_count["BiasGelu"] + op_count["FastGelu"]
+        layer_norm = op_count["LayerNormalization"] + op_count["SkipLayerNormalization"]
+        is_perfect = (
+            (embed > 0)
+            and (attention > 0)
+            and (attention == gelu)
+            and (layer_norm >= 2 * attention)
+        )
+
+        if layer_norm == 0:
+            logger.debug("Layer Normalization not fused")
+
+        if gelu == 0:
+            logger.debug("Gelu/FastGelu not fused")
+
+        if embed == 0:
+            logger.debug("Embed Layer not fused")
+
+        if attention == 0:
+            logger.warning("Attention not fused")
+
+        return is_perfect
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_roformer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_roformer.py
index 85889319..7bffb2e7 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_roformer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_roformer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -537,4 +553,3 @@ class RoformerOnnxModel(OnnxModel):
             logger.warning("Attention not fused")
 
         return is_perfect
-
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_t5.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_t5.py
index 4b1d6b5f..dac070d2 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_t5.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_t5.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -28,7 +44,11 @@ from passes.fusion_skiplayernorm import (
     FusionBiasSkipLayerNormalization,
     FusionSkipLayerNormalization,
 )
-from passes.fusion_t5_attention import FusionT5Attention
+from passes.fusion_splitQKV_update_KVcache import FusionSplitQKVUpdateKVCache
+from passes.fusion_t5_attention import (
+    FusionT5DecoderAttention,
+    FusionT5EncoderAttention,
+)
 from passes.fusion_utils import FusionUtils
 from passes.onnx_model import OnnxModel
 
@@ -46,7 +66,7 @@ class BertOptimizationOptions(FusionOptions):
 
 
 class T5OnnxModel(OnnxModel):
-    def __init__(self, model: ModelProto, num_heads: int = 0, hidden_size: int = 0):
+    def __init__(self, model: ModelProto, num_heads=12, hidden_size=768):
         """Initialize T5 ONNX Model.
 
         Args:
@@ -61,7 +81,6 @@ class T5OnnxModel(OnnxModel):
         super().__init__(model)
         self.num_heads = num_heads
         self.hidden_size = hidden_size
-
         self.attention_mask = AttentionMask(self)
         self.attention_fusion = FusionAttention(
             self, self.hidden_size, self.num_heads, self.attention_mask
@@ -116,13 +135,17 @@ class T5OnnxModel(OnnxModel):
         fusion = FusionRMSNorm(self)
         fusion.apply()
 
-    def fuse_t5_attention(self):
-        fusion = FusionT5Attention(self)
+    def fuse_t5_encoder_attention(self):
+        fusion = FusionT5EncoderAttention(self)
+        fusion.apply()
+
+    def fuse_t5_decoder_attention(self):
+        fusion = FusionT5DecoderAttention(self)
         fusion.apply()
         # pass
 
     def fuse_layer_norm(self):
-        fusion = FusionLayerNormalization(self)
+        fusion = FusionLayerNormalization(self, hidden_size=768)
         fusion.apply()
 
         fusion = FusionLayerNormalizationTF(self)
@@ -136,6 +159,10 @@ class T5OnnxModel(OnnxModel):
         fusion = FusionSkipLayerNormalization(self)
         fusion.apply()
 
+    def fuse_splitQKV_update_kv_cache(self):
+        fusion = FusionSplitQKVUpdateKVCache(self, self.hidden_size, self.num_heads)
+        fusion.apply()
+
     # Only relevant in models with Q-DQ nodes
     def fuse_qordered_mamtul(self):
         fusion = FusionQOrderedMatMul(self)
@@ -433,7 +460,11 @@ class T5OnnxModel(OnnxModel):
 
         self.fuse_rms_norm()
 
-        self.fuse_t5_attention()
+        self.fuse_t5_encoder_attention()
+
+        self.fuse_t5_decoder_attention()
+
+        self.fuse_splitQKV_update_kv_cache()
 
         if (options is None) or options.enable_embed_layer_norm:
             self.fuse_embed_layer()
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_yolo.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_yolo.py
index 57982d0c..42b504c4 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_yolo.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_yolo.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/optimizer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/optimizer.py
index 701bd7a4..0f301e3a 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/optimizer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/optimizer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 import argparse
 import logging
 import time
@@ -10,6 +26,10 @@ from onnx_model_roformer import RoformerOnnxModel
 from onnx_model_conformer import conformerOnnxModel
 from onnx_model_t5 import T5OnnxModel
 from onnx_model_yolo import YoloOnnxModel
+from onnx_model_PVT import PVTOnnxModel
+from onnx_model_cosyvoice import cosyvoiceOnnxModel
+
+
 from onnxsim import simplify
 from passes.fusion_options import FusionOptions
 from passes.symbolic_shape_infer import SymbolicShapeInference
@@ -24,6 +44,10 @@ MODEL_TYPES = {
     "yolo": (YoloOnnxModel, None, "pytorch", 1),
     "vit": (BertOnnxModel, None, "pytorch", 1),
     "conformer": (conformerOnnxModel, None, "pytorch", 1),
+    "PVT": (PVTOnnxModel, None, "pytorch", 1),
+    "omdet": (BertOnnxModel, None, "pytorch", 1),
+    "cosyvoice": (cosyvoiceOnnxModel, None, "pytorch", 1)
+    
 }
 
 
@@ -81,48 +105,50 @@ def optimize_by_fusion(
 def optimize_to_ixrt(args):
     onnx_name = args.onnx[:-5]
     model = onnx.load(args.onnx)
-
-    logger.info("simplify..")
-    simplified_model, check = simplify(model)
-    logger.info("simplify model end...")
-    if args.dump_onnx:
-        onnx.save(simplified_model, onnx_name + "_sim.onnx")
-
-    # transfer to static shape and optimize it
-    static_sim_model = simplified_model
-    if args.input_shapes:
-        for input_tensor in simplified_model.graph.input:
-            if input_tensor.name in args.input_shapes.keys():
-                new_shape = args.input_shapes[input_tensor.name]
-                dim_list = []
-                for dim in new_shape:
-                    if isinstance(dim, int):
-                        dim_proto = onnx.TensorShapeProto.Dimension()
-                        dim_proto.dim_value = dim
-                        dim_list.append(dim_proto)
-                    elif isinstance(dim, str):
-                        dim_proto = onnx.TensorShapeProto.Dimension()
-                        dim_proto.dim_param = dim
-                        dim_list.append(dim_proto)
-
-                del input_tensor.type.tensor_type.shape.dim[:]
-                input_tensor.type.tensor_type.shape.dim.extend(dim_list)
-
-    try:
-        auto_merge = False
-        if args.model_type in ["roformer"]:
-            auto_merge = True
-        static_model = SymbolicShapeInference.infer_shapes(
-            simplified_model, 2**31 - 1, auto_merge, False, 3
-        )
-        static_sim_model, check = simplify(static_model)
+    if not args.not_sim:
+        logger.info("simplify..")
+        simplified_model, check = simplify(model)
+        logger.info("simplify model end...")
         if args.dump_onnx:
-            onnx.save(static_sim_model, onnx_name + "_sim_static_sim.onnx")
-    except Exception as e:
-        static_model = static_sim_model = simplified_model
+            onnx.save(simplified_model, onnx_name + "_sim.onnx")
+
+        # transfer to static shape and optimize it
+        static_sim_model = simplified_model
+        if args.input_shapes:
+            for input_tensor in simplified_model.graph.input:
+                if input_tensor.name in args.input_shapes.keys():
+                    new_shape = args.input_shapes[input_tensor.name]
+                    dim_list = []
+                    for dim in new_shape:
+                        if isinstance(dim, int):
+                            dim_proto = onnx.TensorShapeProto.Dimension()
+                            dim_proto.dim_value = dim
+                            dim_list.append(dim_proto)
+                        elif isinstance(dim, str):
+                            dim_proto = onnx.TensorShapeProto.Dimension()
+                            dim_proto.dim_param = dim
+                            dim_list.append(dim_proto)
+
+                    del input_tensor.type.tensor_type.shape.dim[:]
+                    input_tensor.type.tensor_type.shape.dim.extend(dim_list)
+
+        try:
+            auto_merge = False
+            if args.model_type in ["roformer"]:
+                auto_merge = True
+            static_model = SymbolicShapeInference.infer_shapes(
+                simplified_model, 2**31 - 1, auto_merge, False, 3
+            )
+            static_sim_model, check = simplify(static_model)
+            if args.dump_onnx:
+                onnx.save(static_sim_model, onnx_name + "_sim_static_sim.onnx")
+        except Exception as e:
+            static_model = static_sim_model = simplified_model
 
-    if args.dump_onnx:
-        onnx.save(static_model, onnx_name + "_sim_static.onnx")
+        if args.dump_onnx:
+            onnx.save(static_model, onnx_name + "_sim_static.onnx")
+    if args.not_sim:
+        static_sim_model = model
 
     logger.info("start fusion..")
     opt_model, _ = optimize_by_fusion(
@@ -171,7 +197,7 @@ def args_parser():
         "--model_type",
         type=str,
         default="bert",
-        choices=["bert", "swint", "roformer", "t5", "yolo", "gpt2", "vit", "conformer"],
+        choices=["bert", "swint", "roformer", "t5", "yolo", "gpt2", "vit", "conformer","PVT","omdet","cosyvoice"],
         help="Which kind of model to optimize",
     )
     parser.add_argument(
@@ -181,6 +207,13 @@ def args_parser():
         choices=["debug", "info", "error"],
         help="Which kind of model to optimize",
     )
+
+    parser.add_argument(
+        "--not_sim",
+        action="store_true",
+        default=False,
+        help="simplify model or not",
+    )
     return parser.parse_args()
 
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/__init__.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/__init__.py
index e69de29b..de522e5b 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/__init__.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/float16.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/float16.py
index 437e72fc..96da8751 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/float16.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/float16.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -40,14 +56,24 @@ def convert_np_to_float16(np_array, min_positive_val=5.96e-08, max_finite_val=65
     def between(a, b, c):
         return np.logical_and(a < b, b < c)
 
-    np_array = np.where(between(0, np_array, min_positive_val), min_positive_val, np_array)
-    np_array = np.where(between(-min_positive_val, np_array, 0), -min_positive_val, np_array)
-    np_array = np.where(between(max_finite_val, np_array, float("inf")), max_finite_val, np_array)
-    np_array = np.where(between(float("-inf"), np_array, -max_finite_val), -max_finite_val, np_array)
+    np_array = np.where(
+        between(0, np_array, min_positive_val), min_positive_val, np_array
+    )
+    np_array = np.where(
+        between(-min_positive_val, np_array, 0), -min_positive_val, np_array
+    )
+    np_array = np.where(
+        between(max_finite_val, np_array, float("inf")), max_finite_val, np_array
+    )
+    np_array = np.where(
+        between(float("-inf"), np_array, -max_finite_val), -max_finite_val, np_array
+    )
     return np.float16(np_array)
 
 
-def convert_tensor_float_to_float16(tensor, min_positive_val=5.96e-08, max_finite_val=65504.0):
+def convert_tensor_float_to_float16(
+    tensor, min_positive_val=5.96e-08, max_finite_val=65504.0
+):
     """Convert tensor float to float16.
 
     Args:
@@ -63,13 +89,17 @@ def convert_tensor_float_to_float16(tensor, min_positive_val=5.96e-08, max_finit
     """
 
     if not isinstance(tensor, onnx_proto.TensorProto):
-        raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor))
+        raise ValueError(
+            "Expected input type is an ONNX TensorProto but got %s" % type(tensor)
+        )
 
     if tensor.data_type == onnx_proto.TensorProto.FLOAT:
         tensor.data_type = onnx_proto.TensorProto.FLOAT16
         # convert float_data (float type) to float16 and write to int32_data
         if tensor.float_data:
-            float16_data = convert_np_to_float16(np.array(tensor.float_data), min_positive_val, max_finite_val)
+            float16_data = convert_np_to_float16(
+                np.array(tensor.float_data), min_positive_val, max_finite_val
+            )
             int_list = _npfloat16_to_int(float16_data)
             tensor.int32_data[:] = int_list
             tensor.float_data[:] = []
@@ -78,7 +108,9 @@ def convert_tensor_float_to_float16(tensor, min_positive_val=5.96e-08, max_finit
             # convert n.raw_data to float
             float32_list = np.frombuffer(tensor.raw_data, dtype="float32")
             # convert float to float16
-            float16_list = convert_np_to_float16(float32_list, min_positive_val, max_finite_val)
+            float16_list = convert_np_to_float16(
+                float32_list, min_positive_val, max_finite_val
+            )
             # convert float16 to bytes and write back to raw_data
             tensor.raw_data = float16_list.tobytes()
     return tensor
@@ -168,10 +200,14 @@ def convert_float_to_float16(
     assert (
         min_positive_val >= 5.96e-08
     ), "invalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05"
-    assert max_finite_val <= float(np.finfo(np.float16).max), "invalid max_finite_val. largest float16 value: 65504"
+    assert max_finite_val <= float(
+        np.finfo(np.float16).max
+    ), "invalid max_finite_val. largest float16 value: 65504"
 
     func_infer_shape = None
-    if not disable_shape_infer and version.parse(onnx.__version__) >= version.parse("1.2.0"):
+    if not disable_shape_infer and version.parse(onnx.__version__) >= version.parse(
+        "1.2.0"
+    ):
         try:
             from onnx.shape_inference import infer_shapes
 
@@ -180,7 +216,9 @@ def convert_float_to_float16(
             pass
 
     if not isinstance(model, onnx_proto.ModelProto):
-        raise ValueError("Expected model type is an ONNX ModelProto but got %s" % type(model))
+        raise ValueError(
+            "Expected model type is an ONNX ModelProto but got %s" % type(model)
+        )
 
     # create blocklists
     if op_block_list is None:
@@ -206,8 +244,16 @@ def convert_float_to_float16(
     graph_io_to_skip = set()
     io_casts = set()
 
-    fp32_inputs = [n.name for n in model.graph.input if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT]
-    fp32_outputs = [n.name for n in model.graph.output if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT]
+    fp32_inputs = [
+        n.name
+        for n in model.graph.input
+        if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT
+    ]
+    fp32_outputs = [
+        n.name
+        for n in model.graph.output
+        if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT
+    ]
     if isinstance(keep_io_types, list):
         fp32_inputs = [n for n in fp32_inputs if n in keep_io_types]
         fp32_outputs = [n for n in fp32_outputs if n in keep_io_types]
@@ -227,7 +273,9 @@ def convert_float_to_float16(
             new_value_info.name = output_name
             new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
             # add Cast node (from tensor(float) to tensor(float16) after graph input
-            new_node = [helper.make_node("Cast", [n.name], [output_name], to=10, name=node_name)]
+            new_node = [
+                helper.make_node("Cast", [n.name], [output_name], to=10, name=node_name)
+            ]
             model.graph.node.extend(new_node)
             value_info_list.append(new_value_info)
             io_casts.add(node_name)
@@ -244,7 +292,9 @@ def convert_float_to_float16(
             new_value_info.CopyFrom(n)
             new_value_info.name = input_name
             new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
-            new_node = [helper.make_node("Cast", [input_name], [n.name], to=1, name=node_name)]
+            new_node = [
+                helper.make_node("Cast", [input_name], [n.name], to=1, name=node_name)
+            ]
             model.graph.node.extend(new_node)
             value_info_list.append(new_value_info)
             io_casts.add(node_name)
@@ -275,7 +325,9 @@ def convert_float_to_float16(
                         if n.output[i] in name_mapping:
                             n.output[i] = name_mapping[n.output[i]]
 
-                    is_node_blocked = n.op_type in op_block_list or n.name in node_block_list
+                    is_node_blocked = (
+                        n.op_type in op_block_list or n.name in node_block_list
+                    )
                     for input in n.input:
                         if input in fp32_initializers:
                             fp32_initializers[input].add_node(n, is_node_blocked)
@@ -296,9 +348,15 @@ def convert_float_to_float16(
                 next_level.append(q.g)
                 for n in q.graphs:
                     next_level.append(n)
-                q.t.CopyFrom(convert_tensor_float_to_float16(q.t, min_positive_val, max_finite_val))
+                q.t.CopyFrom(
+                    convert_tensor_float_to_float16(
+                        q.t, min_positive_val, max_finite_val
+                    )
+                )
                 for n in q.tensors:
-                    n = convert_tensor_float_to_float16(n, min_positive_val, max_finite_val)
+                    n = convert_tensor_float_to_float16(
+                        n, min_positive_val, max_finite_val
+                    )
             # if q is graph, process input, output and value_info (ValueInfoProto)
             if isinstance(q, onnx_proto.GraphProto):
                 # Note that float initializers tracked by fp32_initializers will be processed later.
@@ -307,12 +365,19 @@ def convert_float_to_float16(
                 for n in itertools.chain(q.input, q.output, q.value_info):
                     if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT:
                         if n.name not in graph_io_to_skip:
-                            n.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
+                            n.type.tensor_type.elem_type = (
+                                onnx_proto.TensorProto.FLOAT16
+                            )
                             value_info_list.append(n)
                     if n.type.HasField("sequence_type"):
-                        if n.type.sequence_type.elem_type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT:
+                        if (
+                            n.type.sequence_type.elem_type.tensor_type.elem_type
+                            == onnx_proto.TensorProto.FLOAT
+                        ):
                             if n.name not in graph_io_to_skip:
-                                n.type.sequence_type.elem_type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
+                                n.type.sequence_type.elem_type.tensor_type.elem_type = (
+                                    onnx_proto.TensorProto.FLOAT16
+                                )
                                 value_info_list.append(n)
 
         queue = next_level
@@ -320,7 +385,9 @@ def convert_float_to_float16(
     for key, value in fp32_initializers.items():
         # By default, to avoid precision loss, do not convert an initializer to fp16 when it is used only by fp32 nodes.
         if force_fp16_initializers or value.fp16_nodes:
-            value.initializer = convert_tensor_float_to_float16(value.initializer, min_positive_val, max_finite_val)
+            value.initializer = convert_tensor_float_to_float16(
+                value.initializer, min_positive_val, max_finite_val
+            )
             value_info_list.append(make_value_info_from_tensor(value.initializer))
             if value.fp32_nodes and not force_fp16_initializers:
                 logger.info(
@@ -343,10 +410,16 @@ def convert_float_to_float16(
                     new_value_info.CopyFrom(value_info)
                     output_name = node.name + "_input_cast_" + str(i)
                     new_value_info.name = output_name
-                    new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT
+                    new_value_info.type.tensor_type.elem_type = (
+                        onnx_proto.TensorProto.FLOAT
+                    )
                     # add Cast node (from tensor(float16) to tensor(float) before current node
                     node_name = node.name + "_input_cast" + str(i)
-                    new_node = [helper.make_node("Cast", [input], [output_name], to=1, name=node_name)]
+                    new_node = [
+                        helper.make_node(
+                            "Cast", [input], [output_name], to=1, name=node_name
+                        )
+                    ]
                     model.graph.node.extend(new_node)
                     # change current node's input name
                     node.input[i] = output_name
@@ -362,10 +435,16 @@ def convert_float_to_float16(
                     new_value_info.CopyFrom(value_info)
                     input_name = node.name + "_output_cast_" + str(i)
                     new_value_info.name = input_name
-                    new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT
+                    new_value_info.type.tensor_type.elem_type = (
+                        onnx_proto.TensorProto.FLOAT
+                    )
                     # add Cast node (from tensor(float) to tensor(float16) after current node
                     node_name = node.name + "_output_cast" + str(i)
-                    new_node = [helper.make_node("Cast", [input_name], [output], to=10, name=node_name)]
+                    new_node = [
+                        helper.make_node(
+                            "Cast", [input_name], [output], to=10, name=node_name
+                        )
+                    ]
                     model.graph.node.extend(new_node)
                     # change current node's input name
                     node.output[i] = input_name
@@ -373,10 +452,14 @@ def convert_float_to_float16(
     return model
 
 
-def float_to_float16_max_diff(tensor, min_positive_val=5.96e-08, max_finite_val=65504.0):
+def float_to_float16_max_diff(
+    tensor, min_positive_val=5.96e-08, max_finite_val=65504.0
+):
     """Measure the maximum absolute difference after converting a float tensor to float16."""
     if not isinstance(tensor, onnx_proto.TensorProto):
-        raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor))
+        raise ValueError(
+            "Expected input type is an ONNX TensorProto but got %s" % type(tensor)
+        )
     if tensor.data_type != onnx_proto.TensorProto.FLOAT:
         raise ValueError("Expected tensor data type is float.")
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_inverse_sigmoid.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_inverse_sigmoid.py
new file mode 100644
index 00000000..9862d9ee
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_inverse_sigmoid.py
@@ -0,0 +1,85 @@
+
+from logging import getLogger
+from typing import Dict
+
+import numpy as np
+from onnx import TensorProto, helper
+
+from .fusion_base import Fusion
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+class FusionLayerInverseSigmoid(Fusion):
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "InverseSigmoid", "Clip"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+                     +------------Clip-----------+
+                     |                           |
+                     |                           v
+        [Root] -->  Clip-->  Sub  --> Clip --> Div --> Log
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        if len(children) != 2:
+            return
+
+        root_input = node.input[0]
+
+        if not ((children[0].op_type == "Sub" and children[1].op_type == "Clip") or (children[0].op_type == "Clip" and children[1].op_type == "Sub")):
+            return
+
+        log_node = None
+        for child in children:
+            log_node = self.model.find_first_child_by_type(
+                child, "Log", input_name_to_nodes, recursive=True
+            )
+            if log_node is not None:
+                break
+        if log_node is None:
+            return
+        parent_nodes = self.model.match_parent_path(
+            log_node,
+            ["Div", "Clip", "Sub", "Clip"],
+            [0, 1, 0, 1],
+            output_name_to_node,
+        )
+        if parent_nodes is None:
+            return
+
+        sub_node = parent_nodes[2]
+        if sub_node not in children:
+            return
+
+        div_node = parent_nodes[0]
+        div_parents_nodes = self.model.get_parents(div_node)
+        if len(div_parents_nodes) != 2:
+            return
+        if div_parents_nodes[0].op_type != "Clip":
+            return
+        if div_parents_nodes[0] not in children:
+            return
+
+        subgraph_nodes = [node]
+        subgraph_nodes.extend([log_node])
+        subgraph_nodes.extend(parent_nodes)
+        subgraph_nodes.extend([div_parents_nodes[0]])
+        _, eps_val = self.model.get_constant_input(div_parents_nodes[0])
+
+        self.nodes_to_remove.extend(subgraph_nodes)
+        inverse_sigmoid_node = helper.make_node(
+            "InverseSigmoid",
+            inputs=[node.input[0]],
+            outputs=[log_node.output[0]],
+            name=self.model.create_node_name(
+                "InverseSigmoid", name_prefix="InverseSigmoid"
+            ),
+        )
+        inverse_sigmoid_node.attribute.extend(
+            [helper.make_attribute("epsilon", float(eps_val))]
+        )
+        self.nodes_to_add.append(inverse_sigmoid_node)
+        self.node_name_to_graph_name[inverse_sigmoid_node.name] = self.this_graph_name
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_l2_normalization.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_l2_normalization.py
new file mode 100644
index 00000000..bfd1ed28
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_l2_normalization.py
@@ -0,0 +1,69 @@
+from logging import getLogger
+from typing import Dict
+
+import numpy as np
+from onnx import TensorProto, helper
+
+from .fusion_base import Fusion
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+class FusionLayerL2Normalization(Fusion):
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "L2Normalization", "Abs"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+            +-------------------------------------------------------+
+            |                                                       |
+            |                                                       v
+        [Root] -->  Abs-->  Pow  --> ReduceSum --> Pow --> Clip --> Div
+        """
+        pow1_nodes = self.model.get_children(node, input_name_to_nodes)
+        if len(pow1_nodes) != 1 or pow1_nodes[0].op_type != "Pow":
+            return
+
+        reduce_nodes = self.model.get_children(pow1_nodes[0], input_name_to_nodes)
+        if len(reduce_nodes) != 1 or reduce_nodes[0].op_type != "ReduceSum":
+            return
+
+        pow2_nodes = self.model.get_children(reduce_nodes[0], input_name_to_nodes)
+        if len(pow2_nodes) != 1 or pow2_nodes[0].op_type != "Pow":
+            return
+
+        clip_nodes = self.model.get_children(pow2_nodes[0], input_name_to_nodes)
+        if len(clip_nodes) != 1 or clip_nodes[0].op_type != "Clip":
+            return
+
+        div_nodes = self.model.get_children(clip_nodes[0], input_name_to_nodes)
+        if len(div_nodes) != 1 or div_nodes[0].op_type != "Div":
+            return
+
+        root_input = node.input[0]
+        if div_nodes[0].input[0] != root_input:
+            return
+
+        subgraph_nodes = [node, pow1_nodes[0], reduce_nodes[0], pow2_nodes[0], clip_nodes[0], div_nodes[0]]
+        _, eps_val = self.model.get_constant_input(clip_nodes[0])
+        _, norm_axes = self.model.get_constant_input(reduce_nodes[0])
+        norm_axes = norm_axes.astype(np.int32)
+
+        self.nodes_to_remove.extend(subgraph_nodes)
+        l2_normalization_node = helper.make_node(
+            "L2Normalization",
+            inputs=[node.input[0]],
+            outputs=[div_nodes[0].output[0]],
+            name=self.model.create_node_name(
+                "L2Normalization", name_prefix="L2Normalization"
+            ),
+        )
+        l2_normalization_node.attribute.extend(
+            [helper.make_attribute("epsilon", float(eps_val)), 
+             helper.make_attribute("axes", norm_axes),
+             helper.make_attribute("axes_length", int(norm_axes.size))]
+        )
+        self.nodes_to_add.append(l2_normalization_node)
+        self.node_name_to_graph_name[l2_normalization_node.name] = self.this_graph_name
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_omdet_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_omdet_attention.py
new file mode 100644
index 00000000..3451731f
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_omdet_attention.py
@@ -0,0 +1,149 @@
+
+from logging import getLogger
+from typing import Dict
+
+import math
+import numpy as np
+from onnx import TensorProto, helper
+
+from .fusion_base import Fusion
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+class FusionLayerOmdetAttention(Fusion):
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "CustomQKVToContextPluginDynamic_IxRT", "CustomFCPluginDynamic_IxRT"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+        [Root] -->  CustomFCPluginDynamic_IxRT-->  CustomQKVToContextPluginDynamic_IxRT  --> CustomFCPluginDynamic_IxRT
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        parent = self.model.get_parents(node, output_name_to_node)
+        
+        if len(children) != 1:
+            return
+        if len(parent) != 1:
+            return
+
+        fc_first_node = None
+        for par in parent:
+            fc_first_node = self.model.find_first_parent_by_type(
+                par, "CustomFCPluginDynamic_IxRT", output_name_to_node, recursive=True
+            )
+            if fc_first_node is not None:
+                break
+        if fc_first_node is None:
+            return
+        
+        start_node = node
+        
+        # v path
+        v_nodes = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 1, 0, 0],
+            output_name_to_node,
+        )
+        
+        # path1, q and k path
+        q_nodes = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Softmax", "Add", "MatMul", "Transpose", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        k_nodes = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Softmax", "Add", "MatMul", "Mul", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+    
+        # path2, q and k path
+        q_nodes_1 = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Softmax", "Reshape", "Add", "Reshape", "Add", "MatMul", "Transpose", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        k_nodes_1 = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Softmax", "Reshape", "Add", "Reshape", "Add", "MatMul", "Mul", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        if v_nodes is None:
+            return
+        
+        if v_nodes and q_nodes and k_nodes:
+            subgraph_nodes = []
+            subgraph_nodes.extend(q_nodes)
+            subgraph_nodes.extend(k_nodes)
+            subgraph_nodes.extend(v_nodes)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            
+            add_node = q_nodes[4]
+            hidden_size = start_node.attribute[0].i
+            _, mul_val = self.model.get_constant_input(k_nodes[6])
+            num_heads = hidden_size // math.floor((1/mul_val)*(1/ mul_val))
+            attention_input_1_name = add_node.input[1]
+        
+        if v_nodes and q_nodes_1 and k_nodes_1:
+            subgraph_nodes = []
+            subgraph_nodes.extend(q_nodes_1)
+            subgraph_nodes.extend(k_nodes_1)
+            subgraph_nodes.extend(v_nodes)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            
+            hidden_size = start_node.attribute[0].i
+            _, mul_val = self.model.get_constant_input(k_nodes_1[9])
+            num_heads = hidden_size // math.floor((1/mul_val)*(1/ mul_val))
+            
+            add_1 = self.model.get_initializer(q_nodes_1[5].input[1], True)
+            add_2 = self.model.get_initializer(q_nodes_1[7].input[1], True)
+            add_all = np.squeeze(add_1 + add_2)
+            
+            attention_input_1_name = "attention_" + q_nodes_1[5].input[1]
+            attention_input_1 = helper.make_tensor(
+                attention_input_1_name, TensorProto.FLOAT, add_all.shape, add_all.flatten().tolist())
+            
+            self.model.add_initializer(attention_input_1, self.this_graph_name)
+            
+        attention_node = helper.make_node(
+            "CustomQKVToContextPluginDynamic_IxRT",
+            inputs=[fc_first_node.output[0], attention_input_1_name],
+            outputs=[start_node.input[0]],
+            name=self.model.create_node_name(
+                "OmdetAttention", name_prefix="OmdetAttention"
+            ),
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
+        attention_node.attribute.extend([helper.make_attribute("hidden_size", hidden_size)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", 1)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("has_qk_bias", 1)])
+        
+        self.nodes_to_remove.extend(subgraph_nodes_unique)
+        
+        self.nodes_to_add.append(attention_node)
+        self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
+        
+        
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_series_bias_add.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_series_bias_add.py
index e1fde76f..bb9a1cab 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_series_bias_add.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_series_bias_add.py
@@ -1,11 +1,28 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 from logging import getLogger
 
+import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_utils import NumpyHelper
-from onnx import NodeProto, TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
-import numpy as np
-import onnx
 
 logger = getLogger(__name__)
 
@@ -35,7 +52,7 @@ class FusionSerialBiasAdd(Fusion):
 
         biases = [
             self.model.get_initializer(add_1st.input[1]),
-            self.model.get_initializer(add_2nd.input[1])
+            self.model.get_initializer(add_2nd.input[1]),
         ]
         if not all(biases):
             return
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_PVT_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_PVT_attention.py
new file mode 100644
index 00000000..2d4cc73a
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_PVT_attention.py
@@ -0,0 +1,130 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import math
+from enum import Enum
+from logging import getLogger
+from os import name
+from sys import path
+from typing import Tuple, Union
+
+import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_options import AttentionMaskFormat
+from .fusion_utils import FusionUtils, NumpyHelper
+from .onnx_model import OnnxModel
+from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
+
+logger = getLogger(__name__)
+
+
+class FusionPVTAttention(Fusion):
+    """
+    Fuse FusionPVTAttention subgraph into one Attention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+    ):
+        super().__init__(
+            model,
+            "CustomQkvCrossToContext_IxRT",
+            ["Softmax"],
+        )
+
+        # Flags to show warning only once
+        self.num_heads_warning = False
+        self.hidden_size_warning = False
+
+
+    def create_decoder_attention_node(
+        self, inputs: str, outputs: str, type_mask: int, has_mask: int,scale: float
+    ) -> Union[NodeProto, None]:
+        """Create an Attention node.
+
+        Args:
+            input (str): input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+
+        attention_node_name = self.model.create_node_name("cross_Attention")
+        attention_node = helper.make_node(
+            "CustomQkvCrossToContext_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=attention_node_name,
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("scale", scale)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", has_mask)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("type_mask", type_mask)])
+
+        return attention_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        """
+        path:
+
+         (query) ---------------->MatMul ---->Mul --->softmax --->MatMul--->
+                                    /                             /
+         (key)   ---->Transpose -->                              /
+                                                                /
+                                                               /
+                                                              /
+         (value)--------------------------------------------->
+
+        """
+
+        start_node = node
+        qkv_paths = {
+            "path": (["Mul", "MatMul", "Transpose"], [0, 0, 0]),  # cross attention qery pass
+        }
+
+        qkv_nodes, qkv_path = self.match_parent_path_from_dict(start_node, qkv_paths)
+        if qkv_nodes is None:
+            logger.debug("fuse_attention: failed to match qkv path")
+            return
+        next_nodes = self.model.get_children(node)
+        if len(next_nodes) == 0:
+            return
+
+        if next_nodes[0].op_type != "MatMul":
+            return
+
+        second_matmul_node = next_nodes[0]
+        attention_outputs = second_matmul_node.output
+        remove_nodes = [second_matmul_node, node]
+
+
+
+        (mul_node, first_matmul_node, transpose_node) = qkv_nodes
+        transpose_nodes = self.model.get_parents(first_matmul_node)
+        
+        q_input = transpose_nodes[0].output[0]
+        k_input = transpose_nodes[1].input[0]
+        v_input = second_matmul_node.input[1]
+        attention_inputs = [q_input, k_input, v_input]
+        remove_nodes.extend([first_matmul_node, mul_node, transpose_nodes[1]])
+
+        has_mask = 0
+        type_mask = 4 
+        
+        scale =  numpy_helper.to_array(self.model.get_initializer(mul_node.input[1])).item()                
+        atten_node = self.create_decoder_attention_node(
+            attention_inputs, attention_outputs, type_mask, has_mask,scale
+        )
+        self.nodes_to_add.append(atten_node)
+        self.node_name_to_graph_name[atten_node.name] = self.this_graph_name
+        self.nodes_to_remove.extend(remove_nodes)
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_albert_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_albert_attention.py
index 47b8ec77..a3e31fe7 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_albert_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_albert_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -66,12 +82,11 @@ class FusionAlbertAttention(Fusion):
         """
 
         # we assume that reshape fusion has done, so the shape is a tensor like [0, 0, num_heads, head_size]
-        q_shape = self.model.get_initializer(reshape_q.input[1])
-        if q_shape is None:
+        q_shape_value = self.model.get_constant_value(reshape_q.input[1])
+        if q_shape_value is None:
             logger.debug(f"{reshape_q.input[1]} is not initializer.")
             return self.num_heads, self.hidden_size  # Fall back to user specified value
 
-        q_shape_value = NumpyHelper.to_array(q_shape)
         if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
             logger.debug(
                 f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
@@ -413,11 +428,13 @@ class FusionAlbertAttention(Fusion):
 
         is_distill = False
         is_distill_add = False
+        is_mul_split = False
         qk_paths = {
             "path1": (["Softmax", "Add", "Div", "MatMul"], [0, 0, None, 0]),
             "path2": (["Softmax", "Add", "Mul", "MatMul"], [0, 0, None, 0]),
             "path3": (["Softmax", "Where", "MatMul", "Div"], [0, 0, 2, 0]),
             "path4": (["Softmax", "Add", "Where", "MatMul"], [0, 0, 0, 2]),
+            "path5": (["Softmax", "Add", "MatMul"], [0, 0, None])
         }
 
         qk_nodes = None
@@ -429,12 +446,13 @@ class FusionAlbertAttention(Fusion):
                 is_distill = True
             if k == "path4":
                 is_distill_add = True
+            if k == "path5":
+                is_mul_split = True
             break
 
         if qk_nodes is None:
             logger.debug("fuse_attention: failed to match qk path")
             return
-
         add_qk = None
         matmul_qk = None
         where_qk = None
@@ -442,6 +460,8 @@ class FusionAlbertAttention(Fusion):
             (_, where_qk, matmul_qk, _) = qk_nodes
         elif is_distill_add:
             (_, add_qk, where_qk, matmul_qk) = qk_nodes
+        elif is_mul_split:
+            (_, add_qk, matmul_qk) = qk_nodes
         else:
             (_, add_qk, _, matmul_qk) = qk_nodes
 
@@ -454,6 +474,12 @@ class FusionAlbertAttention(Fusion):
                 ["Div", "Transpose", "Reshape", "Add", "MatMul"],
                 [0, 0, 0, 0, None],
             )
+            if q_nodes is None and is_mul_split:
+                q_nodes = self.model.match_parent_path(
+                    matmul_qk,
+                    ["Mul", "Transpose", "Reshape", "Add", "MatMul"],
+                    [0, 0, 0, 0, None],
+                )
             if q_nodes is None:
                 logger.debug("fuse_attention: failed to match q path")
                 return
@@ -470,6 +496,13 @@ class FusionAlbertAttention(Fusion):
                 ["Transpose", "Transpose", "Reshape", "Add", "MatMul"],
                 [1, 0, 0, 0, None],
             )
+            if k_nodes is None and is_mul_split:
+                k_nodes = self.model.match_parent_path(
+                    matmul_qk,
+                    ["Mul", "Transpose", "Reshape", "Add", "MatMul"],
+                    [1, 0, 0, 0, None],
+                )
+            
             if k_nodes is None:
                 logger.debug("fuse_attention: failed to match k path")
                 return
@@ -505,6 +538,14 @@ class FusionAlbertAttention(Fusion):
                         f"fuse_attention: failed to verify shape inference of {add_qk}"
                     )
                     return
+        elif is_mul_split:
+            _, mask_nodes, _ = self.model.match_parent_paths(
+                add_qk,
+                [
+                    (["Where", "Cast", "Sub", "Cast", "Expand", "Unsqueeze"], [None, 0, 0, 1, 0, 0])
+                ],
+                output_name_to_node,
+            )
         else:
             _, mask_nodes, _ = self.model.match_parent_paths(
                 add_qk,
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_attention.py
index c7507218..38ddf629 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -9,10 +25,11 @@ from sys import path
 from typing import Tuple, Union
 
 import numpy as np
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_options import AttentionMaskFormat
 from .fusion_utils import FusionUtils, NumpyHelper
-from onnx import NodeProto, TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
 
@@ -75,7 +92,9 @@ class AttentionMask:
             outputs=[output_name],
             name=self.model.create_node_name("ReduceSum", "MaskReduceSum"),
         )
-        mask_index_node.attribute.extend([helper.make_attribute("axes", [1]), helper.make_attribute("keepdims", 0)])
+        mask_index_node.attribute.extend(
+            [helper.make_attribute("axes", [1]), helper.make_attribute("keepdims", 0)]
+        )
         self.model.add_node(mask_index_node)
 
         self.mask_indice[input] = output_name
@@ -94,7 +113,9 @@ class FusionAttention(Fusion):
         num_heads: int,
         attention_mask: AttentionMask,
     ):
-        super().__init__(model, "Attention", ["SkipLayerNormalization", "LayerNormalization"])
+        super().__init__(
+            model, "Attention", ["SkipLayerNormalization", "LayerNormalization"]
+        )
         self.hidden_size = hidden_size
         self.num_heads = num_heads
         self.attention_mask = attention_mask
@@ -121,7 +142,9 @@ class FusionAttention(Fusion):
 
         q_shape_value = NumpyHelper.to_array(q_shape)
         if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
-            logger.debug(f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size].")
+            logger.debug(
+                f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
             return self.num_heads, self.hidden_size  # Fall back to user specified value
 
         num_heads = q_shape_value[2]
@@ -130,7 +153,9 @@ class FusionAttention(Fusion):
 
         if self.num_heads > 0 and num_heads != self.num_heads:
             if self.num_heads_warning:
-                logger.warning(f"--num_heads is {self.num_heads}. Detected value is {num_heads}. Using detected value.")
+                logger.warning(
+                    f"--num_heads is {self.num_heads}. Detected value is {num_heads}. Using detected value."
+                )
                 self.num_heads_warning = False  # Do not show the warning more than once
 
         if self.hidden_size > 0 and hidden_size != self.hidden_size:
@@ -138,7 +163,9 @@ class FusionAttention(Fusion):
                 logger.warning(
                     f"--hidden_size is {self.hidden_size}. Detected value is {hidden_size}. Using detected value."
                 )
-                self.hidden_size_warning = False  # Do not show the warning more than once
+                self.hidden_size_warning = (
+                    False  # Do not show the warning more than once
+                )
 
         return num_heads, hidden_size
 
@@ -196,15 +223,23 @@ class FusionAttention(Fusion):
         assert num_heads > 0
 
         if hidden_size > 0 and (hidden_size % num_heads) != 0:
-            logger.debug(f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}")
+            logger.debug(
+                f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}"
+            )
             return None
 
         q_weight = self.model.get_initializer(q_matmul.input[1])
         k_weight = self.model.get_initializer(k_matmul.input[1])
         v_weight = self.model.get_initializer(v_matmul.input[1])
-        q_bias = self.model.get_initializer(q_add.input[1]) or self.model.get_initializer(q_add.input[0])
-        k_bias = self.model.get_initializer(k_add.input[1]) or self.model.get_initializer(k_add.input[0])
-        v_bias = self.model.get_initializer(v_add.input[1]) or self.model.get_initializer(v_add.input[0])
+        q_bias = self.model.get_initializer(
+            q_add.input[1]
+        ) or self.model.get_initializer(q_add.input[0])
+        k_bias = self.model.get_initializer(
+            k_add.input[1]
+        ) or self.model.get_initializer(k_add.input[0])
+        v_bias = self.model.get_initializer(
+            v_add.input[1]
+        ) or self.model.get_initializer(v_add.input[0])
 
         if q_weight is None:
             print(
@@ -283,7 +318,11 @@ class FusionAttention(Fusion):
 
         # Sometimes weights and bias are stored in fp16
         if q_weight.data_type == 10:
-            weight.CopyFrom(numpy_helper.from_array(NumpyHelper.to_array(weight).astype(np.float16), weight.name))
+            weight.CopyFrom(
+                numpy_helper.from_array(
+                    NumpyHelper.to_array(weight).astype(np.float16), weight.name
+                )
+            )
         self.model.add_initializer(weight, self.this_graph_name)
 
         bias = helper.make_tensor(
@@ -293,7 +332,11 @@ class FusionAttention(Fusion):
             vals=qkv_bias.flatten().tolist(),
         )
         if q_bias.data_type == 10:
-            bias.CopyFrom(numpy_helper.from_array(NumpyHelper.to_array(bias).astype(np.float16), bias.name))
+            bias.CopyFrom(
+                numpy_helper.from_array(
+                    NumpyHelper.to_array(bias).astype(np.float16), bias.name
+                )
+            )
         self.model.add_initializer(bias, self.this_graph_name)
 
         attention_inputs = [
@@ -321,7 +364,11 @@ class FusionAttention(Fusion):
 
         if is_qkv_diff_dims:
             attention_node.attribute.extend(
-                [helper.make_attribute("qkv_hidden_sizes", [qw_out_size, kw_out_size, vw_out_size])]
+                [
+                    helper.make_attribute(
+                        "qkv_hidden_sizes", [qw_out_size, kw_out_size, vw_out_size]
+                    )
+                ]
             )
 
         return attention_node
@@ -400,7 +447,9 @@ class FusionAttention(Fusion):
         if children_types.count("MatMul") != 3:
             return
 
-        v_nodes = self.model.match_parent_path(matmul_qkv, ["Transpose", "Reshape", "Add", "MatMul"], [1, 0, 0, None])
+        v_nodes = self.model.match_parent_path(
+            matmul_qkv, ["Transpose", "Reshape", "Add", "MatMul"], [1, 0, 0, None]
+        )
         if v_nodes is None:
             logger.debug("fuse_attention: failed to match v path")
             return
@@ -440,7 +489,9 @@ class FusionAttention(Fusion):
         else:
             (_, add_qk, _, matmul_qk) = qk_nodes
 
-        q_nodes = self.model.match_parent_path(matmul_qk, ["Transpose", "Reshape", "Add", "MatMul"], [0, 0, 0, None])
+        q_nodes = self.model.match_parent_path(
+            matmul_qk, ["Transpose", "Reshape", "Add", "MatMul"], [0, 0, 0, None]
+        )
         if q_nodes is None:
             q_nodes = self.model.match_parent_path(
                 matmul_qk,
@@ -454,7 +505,9 @@ class FusionAttention(Fusion):
         add_q = q_nodes[-2]
         matmul_q = q_nodes[-1]
 
-        k_nodes = self.model.match_parent_path(matmul_qk, ["Transpose", "Reshape", "Add", "MatMul"], [1, 0, 0, None])
+        k_nodes = self.model.match_parent_path(
+            matmul_qk, ["Transpose", "Reshape", "Add", "MatMul"], [1, 0, 0, None]
+        )
         if k_nodes is None:
             k_nodes = self.model.match_parent_path(
                 matmul_qk,
@@ -492,7 +545,9 @@ class FusionAttention(Fusion):
             if add_qk is not None:
                 add_qk_str = self.get_add_qk_str(add_qk)
                 if add_qk_str is None:
-                    logger.debug(f"fuse_attention: failed to verify shape inference of {add_qk}")
+                    logger.debug(
+                        f"fuse_attention: failed to verify shape inference of {add_qk}"
+                    )
                     return
         else:
             _, mask_nodes, _ = self.model.match_parent_paths(
@@ -510,7 +565,11 @@ class FusionAttention(Fusion):
             logger.debug("fuse_attention: failed to match mask path")
             return
 
-        if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_k.input[0] == root_input:
+        if (
+            matmul_v.input[0] == root_input
+            and matmul_q.input[0] == root_input
+            and matmul_k.input[0] == root_input
+        ):
             mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0])
 
             attention_last_node = reshape_qkv if einsum_node is None else transpose_qkv
@@ -545,7 +604,9 @@ class FusionAttention(Fusion):
                     name="shape_modified_tensor" + unique_index,
                     data_type=TensorProto.INT64,
                     dims=[4],
-                    vals=np.int64([0, 0, q_num_heads, int(q_hidden_size / q_num_heads)]).tobytes(),
+                    vals=np.int64(
+                        [0, 0, q_num_heads, int(q_hidden_size / q_num_heads)]
+                    ).tobytes(),
                     raw=True,
                 )
                 self.model.add_initializer(shape_tensor, self.this_graph_name)
@@ -560,7 +621,9 @@ class FusionAttention(Fusion):
                 )
                 einsum_node.input[0] = new_edge
 
-            self.nodes_to_remove.extend([attention_last_node, transpose_qkv, matmul_qkv])
+            self.nodes_to_remove.extend(
+                [attention_last_node, transpose_qkv, matmul_qkv]
+            )
             self.nodes_to_remove.extend(qk_nodes)
             self.nodes_to_remove.extend(q_nodes)
             self.nodes_to_remove.extend(k_nodes)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_base.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_base.py
index aaf742a4..3732b0f5 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_base.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_base.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_biasgelu.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_biasgelu.py
index 8e3406c7..045cd993 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_biasgelu.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_biasgelu.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,9 +21,10 @@
 
 from logging import getLogger
 
+from onnx import helper
+
 from .fusion_base import Fusion
 from .fusion_utils import NumpyHelper
-from onnx import helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_attention.py
index e825f95c..21161727 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_xsoftmax.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_xsoftmax.py
index 78a40973..b55c2412 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_xsoftmax.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_xsoftmax.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conv_reformat.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conv_reformat.py
new file mode 100644
index 00000000..23cdd0c2
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conv_reformat.py
@@ -0,0 +1,128 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import math
+from enum import Enum
+from logging import getLogger
+from os import name
+from sys import path
+from typing import Tuple, Union
+
+import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_options import AttentionMaskFormat
+from .fusion_utils import FusionUtils, NumpyHelper
+from .onnx_model import OnnxModel
+from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
+
+logger = getLogger(__name__)
+
+
+class FusionConvReformat(Fusion):
+    """
+    Fuse FusionPVTAttention subgraph into one Attention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+    ):
+        super().__init__(
+            model,
+            "FuseConvReformat_IxRT",
+            ["Transpose"],
+        )
+
+
+
+    def create_fuse_node(
+        self, inputs: str, outputs: str, before_conv: int, shape_data: list, prefix
+    ) -> Union[NodeProto, None]:
+        """Create an Attention node.
+
+        Args:
+            input (str): input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+
+        node_name = self.model.create_node_name(f"FuseConvReformat_{prefix}")
+        node = helper.make_node(
+            "FuseConvReformat_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=node_name,
+        )
+        node.domain = "com.iluvatar"
+
+        node.attribute.extend([helper.make_attribute("before_conv", before_conv)])
+        node.attribute.extend([helper.make_attribute("shape_data", shape_data)])
+        node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        return node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        """
+        eliminate  Transpose(linear->nchw) + Transpose 
+        path: 
+        ----->Transpose ---->Reshape---> conv ----->Reshape ---->Transpose--->
+        
+        to:
+        ----->FuseConvReformat_IxRT---> conv ----->FuseConvReformat_IxRT--->
+        
+        """        
+        start_node = node
+        paths = {
+            "path": (["Reshape", "Conv", "Reshape","Transpose"], [0, 0, 0, 0]),  # cross attention qery pass
+        }
+
+        nodes, path = self.match_parent_path_from_dict(start_node, paths)
+        
+        if nodes is None:
+            logger.debug("FuseConvReformat: failed to match  path")
+            return
+        
+        (reshape_after_node, conv_node, reshape_before_node, tranpose_before_node) = nodes
+
+        perm1 = tranpose_before_node.attribute[0].ints
+        if perm1 !=[0, 2, 1]:
+            return
+        perm2 = start_node.attribute[0].ints
+        if perm2 !=[0, 2, 1]:
+            return
+        
+        before_shape_data  =  numpy_helper.to_array(self.model.get_initializer(reshape_before_node.input[1]))
+        
+        if before_shape_data.shape[0] != 4:
+            return
+        
+        after_shape_data  =  numpy_helper.to_array(self.model.get_initializer(reshape_after_node.input[1]))
+        if after_shape_data.shape[0] != 3:
+            return
+        node1_inputs = tranpose_before_node.input
+        node1_outputs = reshape_before_node.output
+        node1_before_conv = 1
+        
+        new_node1 = self.create_fuse_node(
+            node1_inputs, node1_outputs, node1_before_conv, before_shape_data,"before")
+        
+        
+        node2_inputs = conv_node.output
+        node2_outputs = start_node.output
+        node2_before_conv = 0
+        new_node2 = self.create_fuse_node(
+            node2_inputs, node2_outputs, node2_before_conv, after_shape_data,"after")
+        
+        self.nodes_to_add.append(new_node1)
+        self.nodes_to_add.append(new_node2)
+        self.node_name_to_graph_name[new_node1.name] = self.this_graph_name
+        self.node_name_to_graph_name[new_node2.name] = self.this_graph_name        
+        self.nodes_to_remove.extend([start_node, reshape_after_node,reshape_before_node,tranpose_before_node])
+
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_attention.py
new file mode 100644
index 00000000..5bfa8768
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_attention.py
@@ -0,0 +1,210 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import math
+from enum import Enum
+from logging import getLogger
+from os import name
+from sys import path
+from typing import Tuple, Union
+
+import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_options import AttentionMaskFormat
+from .fusion_utils import FusionUtils, NumpyHelper
+from .onnx_model import OnnxModel
+from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
+
+logger = getLogger(__name__)
+
+
+
+class FusionCosyvoiceAttention(Fusion):
+    """
+    Fuse T5Attention subgraph into one Attention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+    ):
+        super().__init__(
+            model,
+            "CustomQkvCrossToContext_IxRT",
+            ["Softmax"],
+        )
+
+        # Flags to show warning only once
+        self.num_heads_warning = True
+        self.hidden_size_warning = True
+
+    def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> Tuple[int, int]:
+        """Detect num_heads and hidden_size from a reshape node.
+
+        Args:
+            reshape_q (NodeProto): reshape node for Q
+
+        Returns:
+            Tuple[int, int]: num_heads and hidden_size
+        """
+
+        # we assume that reshape fusion has done, so the shape is a tensor like [0, 0, num_heads, head_size]
+        q_shape = self.model.get_initializer(reshape_q.input[1])
+        if q_shape is None:
+            logger.debug(f"{reshape_q.input[1]} is not initializer.")
+            return [0, 0]
+
+        q_shape_value = NumpyHelper.to_array(q_shape)
+        if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
+            logger.debug(
+                f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
+            return [0, 0]
+
+        num_heads = q_shape_value[2]
+        head_size = q_shape_value[3]
+        hidden_size = num_heads * head_size
+
+        return num_heads, hidden_size
+
+    def create_decoder_attention_node(
+        self, inputs: str, outputs: str, type_mask: int, has_mask: int, scale: float
+    ) -> Union[NodeProto, None]:
+        """Create an Attention node.
+
+        Args:
+            input (str): input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+
+        attention_node_name = self.model.create_node_name("decoder_Attention")
+        attention_node = helper.make_node(
+            "CustomQkvCrossToContext_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=attention_node_name,
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("scale", scale)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", has_mask)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("type_mask", type_mask)])
+
+        return attention_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        """
+         path1:
+
+         (query) --------------MatMul---Div --> add -->softmax --->MatMul--->
+                                 /             /                    /
+         (key)   ---->Transpose >             /                    /
+                                             /                    /
+         (mask)   ------------------------>                     /
+                                                               /
+         (value)--------------------------------------------->
+         """
+
+
+
+
+        import pdb
+        start_node = node
+        qkv_paths = {
+            "path1": (
+                ["Add", "Div", "MatMul", "Transpose"],
+                [None, 0, None, 1],
+            ),  # float mask self attention,self attention key pass
+        }
+
+        qkv_nodes, qkv_path = self.match_parent_path_from_dict(start_node, qkv_paths)
+        
+        if qkv_nodes is None:
+            logger.debug("fuse_attention: failed to match qkv path")
+            return
+        next_nodes = self.model.get_children(node)
+    
+        if len(next_nodes) == 0:
+            return
+                
+        if next_nodes[0].op_type != "MatMul":
+            return
+        
+        second_matmul_node = next_nodes[0]
+        attention_inputs = None
+        attention_outputs = second_matmul_node.output
+        remove_nodes = [second_matmul_node, node]
+
+        (add_node, div_node, first_matmul_node, transpose_node) = qkv_nodes
+        transpose_nodes = self.model.get_parents(first_matmul_node)
+        q_input = transpose_nodes[0].output[0]
+        
+        k_transpose_node = transpose_nodes[1]
+        k_transpose_node_perm = k_transpose_node.attribute[0].ints
+        
+        if  k_transpose_node_perm == [0, 2, 3, 1]:  #transpose has bean merge,[0,2,1,3]->[0, 1, 3, 2] = [0, 2, 3, 1]
+            k_input = transpose_nodes[1].output[0]
+            
+            transpose_nodes[1].attribute[0].ints[0] = 0 
+            transpose_nodes[1].attribute[0].ints[1] = 2 
+            transpose_nodes[1].attribute[0].ints[2] = 1 
+            transpose_nodes[1].attribute[0].ints[3] = 3 
+            
+            remove_nodes.extend([add_node, div_node, first_matmul_node])
+            
+        elif k_transpose_node_perm == [0, 1, 3, 2]:
+            k_input = transpose_nodes[1].input[0]
+            remove_nodes.extend([add_node, div_node, first_matmul_node,k_transpose_node])
+            
+        else:
+            return         
+        
+        v_input = second_matmul_node.input[1]
+        attention_inputs = [q_input, k_input, v_input]
+        
+        has_mask = 1
+        type_mask = 3 # float mask
+        
+        mask_input = add_node.input[0]
+        score_out = div_node.output[0]
+        if add_node.input[0] == score_out:
+            mask_input = add_node.input[1]
+        attention_inputs.append(mask_input)
+        
+        scale_data = self.model.get_initializer_input_edges(div_node.name, return_np_array = True)
+        scale = 1.0 / scale_data[0]
+        
+        atten_node = self.create_decoder_attention_node(
+            attention_inputs, attention_outputs, type_mask, has_mask, scale
+        )
+        
+        self.nodes_to_add.append(atten_node)
+        self.node_name_to_graph_name[atten_node.name] = self.this_graph_name
+        self.nodes_to_remove.extend(remove_nodes)
+        
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV.py
new file mode 100755
index 00000000..d1a1baff
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import Tuple, Union
+
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_utils import NumpyHelper
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+
+class FusionSplitQKV(Fusion):
+    """
+    Fuse FusionSplitQKV
+    """
+
+    def __init__(self, model: OnnxModel, hidden_size: int, num_heads: int):
+        super().__init__(model, "SplitQKV_IxRT", "Split")
+
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+
+    def create_node(
+        self, inputs: list, outputs:list
+    ) -> Union[NodeProto, None]:
+        """Create an create node.
+
+        Args:
+            data_input (str): data input name
+            mask_input (str): max input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+        node_name = self.model.create_node_name("SplitQKV_IxRT")
+        
+        
+        k_cache_output = outputs[1]
+        v_cache_output = outputs[2]
+        
+        concat_k_input = k_cache_output + "_k_concat_input"
+        concat_v_input = v_cache_output + "_v_concat_input"
+        
+        plugin_outputs = [outputs[0],concat_k_input,concat_v_input]
+
+        new_node = helper.make_node(
+            "SplitQKV_IxRT",
+            inputs=inputs,
+            outputs=plugin_outputs,
+            name=node_name,
+        )
+        new_node.domain = "com.iluvatar"
+        new_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        new_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        new_node.attribute.extend(
+            [helper.make_attribute("atten_scale", 1.0)]
+        )
+        new_node.attribute.extend(
+            [helper.make_attribute("transpose", 1)]
+        )
+        new_node.attribute.extend([helper.make_attribute("num_head", self.num_heads)])
+        new_node.attribute.extend(
+            [helper.make_attribute("head_dim", self.hidden_size // self.num_heads)]
+        )
+        
+        
+        
+        k_concat_node_name = node_name + "_k_concat"
+        v_concat_node_name = node_name + "_v_concat"
+        
+        k_concat_node = helper.make_node(
+            "Identity",
+            inputs=[concat_k_input],
+            outputs=[outputs[1]],
+            name=k_concat_node_name,
+        )
+        
+        v_concat_node = helper.make_node(
+            "Identity",
+            inputs=[concat_v_input],
+            outputs=[outputs[2]],
+            name=v_concat_node_name,
+        )
+        
+        self.model.replace_input_of_all_nodes(outputs[1],concat_k_input)
+        self.model.replace_input_of_all_nodes(outputs[2],concat_v_input)
+        return new_node,k_concat_node,v_concat_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+        split_node = node
+        split_data = self.model.get_initializer_input_edges(node.name,return_np_array = True)
+        if split_data[0].shape != (3,):
+            return 
+        if split_data[0][0] != split_data[0][1] and  split_data[0][1] != split_data[0][2]:
+            return
+
+        q_input, k_input, v_input = node.output[0],node.output[1],node.output[2]  
+              
+        q_path_nodes= []
+        k_path_nodes= []
+        v_path_nodes= []
+        
+        reshape_nodes = self.model.get_children(node)
+        
+        for node in reshape_nodes:
+            if node.op_type != "Reshape":
+                return
+        q_reshape_node,k_reshape_node,v_reshape_node =  reshape_nodes[0],reshape_nodes[1],reshape_nodes[2]   
+                    
+        q_path_nodes.append(q_reshape_node)
+        k_path_nodes.append(k_reshape_node)    
+        v_path_nodes.append(v_reshape_node) 
+        
+        q_transpose_nodes = self.model.get_children(q_reshape_node) 
+        k_transpose_nodes = self.model.get_children(k_reshape_node) 
+        v_transpose_nodes = self.model.get_children(v_reshape_node)
+        
+        if  len(q_transpose_nodes)!=1 and  (not k_transpose_nodes) and len(v_transpose_nodes) != 1:
+            return
+        
+        
+        if (q_transpose_nodes[0].attribute[0].ints != [0, 2, 1, 3]) and (v_transpose_nodes[0].attribute[0].ints !=[0, 2, 1, 3]):
+                return 
+        
+        if len(k_transpose_nodes) == 2:
+            if (k_transpose_nodes[0].attribute[0].ints != k_transpose_nodes[1].attribute[0].ints) and (k_transpose_nodes[0].attribute[0].ints !=[0, 2, 1, 3]):
+                return 
+            
+        
+        if len(k_transpose_nodes) == 1:
+            if  (k_transpose_nodes[0].attribute[0].ints !=[0, 2, 1, 3]):
+                return 
+                
+        
+        q_transpose_node = q_transpose_nodes[0]
+        k_transpose_node_0 = k_transpose_nodes[0]
+        v_transpose_node = v_transpose_nodes[0]
+        
+        k_output = k_transpose_node_0.output[0]
+        
+        if len(k_transpose_nodes) == 2:
+            k_transpose_node_1 = k_transpose_nodes[1]
+            next_node = self.model.get_children(k_transpose_node_1)
+            if not next_node:
+                return
+                        
+            self.model.replace_node_input(next_node[0], k_transpose_node_1.output[0], k_transpose_node_0.output[0])
+            
+
+        q_path_nodes.append(q_transpose_node)
+        v_path_nodes.append(v_transpose_node)
+        k_path_nodes.extend(k_transpose_nodes)
+        
+        plugin_inputs = [split_node.input[0]] 
+        plugin_outputs = [q_transpose_node.output[0], k_output,v_transpose_node.output[0]]
+        
+        remove_nodes = [split_node]
+        
+        remove_nodes.extend(q_path_nodes)
+        remove_nodes.extend(k_path_nodes)
+        remove_nodes.extend(v_path_nodes)
+                
+        new_node,k_cache_concat_node, v_cache_concat_node = self.create_node(plugin_inputs, plugin_outputs)
+        
+        self.nodes_to_add.append(new_node)
+        self.nodes_to_add.append(k_cache_concat_node)
+        self.nodes_to_add.append(v_cache_concat_node)
+        
+        self.node_name_to_graph_name[new_node.name] = self.this_graph_name
+        self.node_name_to_graph_name[k_cache_concat_node.name] = self.this_graph_name
+        self.node_name_to_graph_name[v_cache_concat_node.name] = self.this_graph_name
+        self.nodes_to_remove.extend(remove_nodes)
+      
+    
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV_update_KVcache.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV_update_KVcache.py
new file mode 100644
index 00000000..6b1599d4
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV_update_KVcache.py
@@ -0,0 +1,188 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import Tuple, Union
+
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_utils import NumpyHelper
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+
+class FusionCosyVoiceSplitQKVUpdateKVCache(Fusion):
+    """
+    Fuse FusionSplitQKVUpdateKVCache
+    """
+
+    def __init__(self, model: OnnxModel, hidden_size: int, num_heads: int):
+        super().__init__(
+            model, "SplitQKVUpdateKVCache_IxRT", "Split"
+        )
+
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+
+    def create_node(
+        self,
+        inputs: list,
+        outputs: list,
+    ) -> Union[NodeProto, None]:
+        """Create an XSoftmax node.
+
+        Args:
+            data_input (str): data input name
+            mask_input (str): max input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+        node_name = self.model.create_node_name("SplitQKVUpdateKVCache_IxRT")
+        
+        k_cache_output = outputs[1]
+        v_cache_output = outputs[2]
+        
+        concat_k_input = k_cache_output + "_k_concat_input"
+        concat_v_input = v_cache_output + "_v_concat_input"
+        
+        plugin_outputs = [outputs[0],concat_k_input,concat_v_input]
+        
+        new_node = helper.make_node(
+            "SplitQKVUpdateKVCache_IxRT",
+            inputs=inputs,
+            outputs=plugin_outputs,
+            name=node_name,
+        )
+        
+        k_concat_node_name = node_name + "_k_concat"
+        v_concat_node_name = node_name + "_v_concat"
+        
+        k_concat_node = helper.make_node(
+            "Identity",
+            inputs=[concat_k_input],
+            outputs=[outputs[1]],
+            name=k_concat_node_name,
+        )
+        
+
+            
+        v_concat_node = helper.make_node(
+            "Identity",
+            inputs=[concat_v_input],
+            outputs=[outputs[2]],
+            name=v_concat_node_name,
+        )
+        
+
+        
+        
+        
+        
+        new_node.domain = "com.iluvatar"
+        new_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        new_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        new_node.attribute.extend([helper.make_attribute("num_head", self.num_heads)])
+        new_node.attribute.extend(
+            [helper.make_attribute("head_dim", self.hidden_size // self.num_heads)]
+        )
+        
+        self.model.replace_input_of_all_nodes(outputs[1],concat_k_input)
+        self.model.replace_input_of_all_nodes(outputs[2],concat_v_input)
+
+        return new_node,k_concat_node,v_concat_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+        
+        split_node = node
+        split_data = self.model.get_initializer_input_edges(node.name,return_np_array = True)
+        if split_data[0].shape != (3,):
+            return 
+        if split_data[0][0] != split_data[0][1] and  split_data[0][1] != split_data[0][2]:
+            return
+
+        q_input, k_input, v_input = node.output[0],node.output[1],node.output[2]  
+              
+        q_path_nodes= []
+        k_path_nodes= []
+        v_path_nodes= []
+        
+        reshape_nodes = self.model.get_children(node)
+        
+        for node in reshape_nodes:
+            if node.op_type != "Reshape":
+                return
+        q_reshape_node,k_reshape_node,v_reshape_node =  reshape_nodes[0],reshape_nodes[1],reshape_nodes[2]   
+                    
+        q_path_nodes.append(q_reshape_node)
+        k_path_nodes.append(k_reshape_node)    
+        v_path_nodes.append(v_reshape_node) 
+        
+        q_transpose_nodes = self.model.get_children(q_reshape_node) 
+        k_transpose_nodes = self.model.get_children(k_reshape_node) 
+        v_transpose_nodes = self.model.get_children(v_reshape_node)
+        
+        if  len(q_transpose_nodes)!=1 and len(k_transpose_nodes) != 1 and len(v_transpose_nodes) != 1:
+            return
+        
+        
+        q_transpose_node = q_transpose_nodes[0]
+        
+        k_transpose_node = k_transpose_nodes[0]
+        v_transpose_node = v_transpose_nodes[0]
+        
+        k_path_nodes.append(k_transpose_node)
+        v_path_nodes.append(v_transpose_node)
+        
+        
+        k_concat_nodes = self.model.get_children(k_transpose_node) 
+        v_concat_nodes = self.model.get_children(v_transpose_node)
+                
+        if  len(k_transpose_nodes) != 1 or len(v_transpose_nodes) != 1:
+            return
+        
+        k_concat_node = k_concat_nodes[0]
+        v_concat_node = v_concat_nodes[0]
+        
+        if v_concat_node.attribute[0].i != 2 and k_concat_node.attribute[0].i != 2: #axis = 2
+            return 
+                
+        k_path_nodes.append(k_concat_node)
+        v_path_nodes.append(v_concat_node)
+        
+        k_cache_input = k_concat_node.input[0]
+        if k_transpose_node.output[0] == k_concat_node.input[0]:
+            k_cache_input = k_concat_node.input[1]
+        k_cache_output =  k_concat_node.output[0]   
+        
+        
+        
+        v_cache_input = v_concat_node.input[0]
+        if v_transpose_node.output[0] == v_concat_node.input[0]:
+            v_cache_input = v_concat_node.input[1]
+        v_cache_output =  v_concat_node.output[0]  
+        
+         
+        plugin_inputs = [split_node.input[0],k_cache_input,v_cache_input] 
+        plugin_outputs = [q_transpose_node.output[0], k_cache_output,v_cache_output]
+        remove_nodes = [split_node, q_reshape_node,q_transpose_node]
+        
+        remove_nodes.extend(k_path_nodes)
+        remove_nodes.extend(v_path_nodes)
+        new_node,k_cache_concat_node, v_cache_concat_node= self.create_node(plugin_inputs, plugin_outputs)
+
+        self.nodes_to_add.append(new_node)
+        self.nodes_to_add.append(k_cache_concat_node)
+        self.nodes_to_add.append(v_cache_concat_node)
+        
+        self.node_name_to_graph_name[new_node.name] = self.this_graph_name
+        self.node_name_to_graph_name[k_cache_concat_node.name] = self.this_graph_name
+        self.node_name_to_graph_name[v_cache_concat_node.name] = self.this_graph_name
+        
+        self.nodes_to_remove.extend(remove_nodes)
+       
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_customfc.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_customfc.py
index e9e40115..c2dd2433 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_customfc.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_customfc.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -291,7 +307,7 @@ class FusionCustomFCActivation(Fusion):
             fc_node = nodes[0]
             activation_type = 3
             if node.op_type == "Gelu":
-                activation_type = 21
+                activation_type = 3
             if node.op_type == "Relu":
                 activation_type = 4
 
@@ -342,3 +358,32 @@ class FusionConformerCustomFCActivation(Fusion):
         self.nodes_to_add.append(custom_fc_node)
         self.nodes_to_remove.extend([node, sigmoid_node, custom_fc_node])
         self.node_name_to_graph_name[custom_fc_node.name] = self.this_graph_name
+
+
+class FusionTorchvisionVitCustomFC(Fusion):
+    def __init__(self, model: OnnxModel):
+        super().__init__(model, "CustomFCPluginDynamic_IxRT", ["CustomQKVToContextPluginDynamic_IxRT"], "torchvision vit custom_fc",)
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+        
+        custom_fc_node_0 = self.model.get_children(node, input_name_to_nodes)
+        transpose_node_0 = self.model.get_children(custom_fc_node_0[0], input_name_to_nodes)
+        
+        if transpose_node_0[0].op_type != "Transpose":
+            return
+        
+        custom_fc_node_0[0].output[0] = transpose_node_0[0].output[0]
+        
+        nodes = self.model.match_parent_path(node, ["CustomFCPluginDynamic_IxRT","Transpose"], [0, 0])
+        if nodes is None:
+            return
+        
+        (custom_fc_node_1, transpose_node_1) = nodes
+        custom_fc_node_1.input[0] = transpose_node_1.input[0]
+        
+        self.nodes_to_add.append(custom_fc_node_1)
+        self.nodes_to_add.append(custom_fc_node_0[0])
+        self.nodes_to_remove.extend([transpose_node_1, custom_fc_node_1, transpose_node_0[0], custom_fc_node_0[0]])
+        self.node_name_to_graph_name[custom_fc_node_1.name] = self.this_graph_name
+        self.node_name_to_graph_name[custom_fc_node_0[0].name] = self.this_graph_name
+        
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_disentangled_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_disentangled_attention.py
index 04eb863f..670a767e 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_disentangled_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_disentangled_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_embedlayer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_embedlayer.py
index 90bddbf8..f46fa2c7 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_embedlayer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_embedlayer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,9 +22,10 @@
 from logging import getLogger
 from typing import Dict, List, Tuple, Union
 
+from onnx import NodeProto, TensorProto, helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import NodeProto, TensorProto, helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -33,7 +50,9 @@ class FusionEmbedLayerNoMask(Fusion):
         self.attention = None
         self.embed_node = None
 
-    def match_two_gather(self, add: NodeProto) -> Union[None, Tuple[NodeProto, NodeProto]]:
+    def match_two_gather(
+        self, add: NodeProto
+    ) -> Union[None, Tuple[NodeProto, NodeProto]]:
         gather_0_path = self.model.match_parent_path(add, ["Gather"], [0])
         if gather_0_path is None:
             return None
@@ -70,7 +89,11 @@ class FusionEmbedLayerNoMask(Fusion):
             children = input_name_to_nodes[layernorm.output[0]]
 
             # For Albert, there is MatMul+Add after embedding layer before attention.
-            if len(children) == 1 and children[0].op_type == "MatMul" and children[0].output[0] in input_name_to_nodes:
+            if (
+                len(children) == 1
+                and children[0].op_type == "MatMul"
+                and children[0].output[0] in input_name_to_nodes
+            ):
                 grandchildren = input_name_to_nodes[children[0].output[0]]
                 if (
                     len(grandchildren) == 1
@@ -90,24 +113,37 @@ class FusionEmbedLayerNoMask(Fusion):
             if is_distil_bert:
                 # SkipLayerNormailization might exist when model has been optimized by ORT first.
                 if (
-                    children_types != ["MatMul", "MatMul", "MatMul", "Shape", "SkipLayerNormalization"]
-                    and children_types != ["Add", "MatMul", "MatMul", "MatMul", "Shape", "Shape"]
+                    children_types
+                    != ["MatMul", "MatMul", "MatMul", "Shape", "SkipLayerNormalization"]
+                    and children_types
+                    != ["Add", "MatMul", "MatMul", "MatMul", "Shape", "Shape"]
                     and children_types != ["Add", "MatMul", "MatMul", "MatMul", "Shape"]
                 ):
-                    logger.debug("No Attention like subgraph in children of LayerNormalization")
+                    logger.debug(
+                        "No Attention like subgraph in children of LayerNormalization"
+                    )
                     return False
             else:
-                if children_types != ["Add", "MatMul", "MatMul", "MatMul",] and children_types != [
+                if children_types != [
+                    "Add",
+                    "MatMul",
+                    "MatMul",
+                    "MatMul",
+                ] and children_types != [
                     "MatMul",
                     "MatMul",
                     "MatMul",
                     "SkipLayerNormalization",
                 ]:
-                    logger.debug("No Attention like subgraph in children of LayerNormalization")
+                    logger.debug(
+                        "No Attention like subgraph in children of LayerNormalization"
+                    )
                     return False
         return True
 
-    def match_position_embedding_distilbert(self, position_embedding_gather, input_ids, output_name_to_node):
+    def match_position_embedding_distilbert(
+        self, position_embedding_gather, input_ids, output_name_to_node
+    ):
         """  Match position embedding path from input_ids to Gather for DistilBert.
 
         Pattern is like the following:
@@ -128,7 +164,9 @@ class FusionEmbedLayerNoMask(Fusion):
                       Gather
         """
         # remove after tests pass
-        path1 = self.model.match_parent_path(position_embedding_gather, ["Expand", "Shape"], [1, 1])
+        path1 = self.model.match_parent_path(
+            position_embedding_gather, ["Expand", "Shape"], [1, 1]
+        )
         if path1 is None:
             path1 = self.model.match_parent_path(
                 position_embedding_gather,
@@ -155,7 +193,8 @@ class FusionEmbedLayerNoMask(Fusion):
 
         range_node = path2[1]
         if not (
-            self.utils.check_node_input_value(range_node, 0, 0) and self.utils.check_node_input_value(range_node, 2, 1)
+            self.utils.check_node_input_value(range_node, 0, 0)
+            and self.utils.check_node_input_value(range_node, 2, 1)
         ):
             return False
 
@@ -169,7 +208,9 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return True
 
-    def match_position_embedding_roberta(self, position_embedding_gather, input_ids, output_name_to_node):
+    def match_position_embedding_roberta(
+        self, position_embedding_gather, input_ids, output_name_to_node
+    ):
         """Match position embedding path from input_ids to Gather for Roberta.
 
         Roberta Embedding Layer Pattern (* is optional since it might be removed by ORT, ? is the padding word id):
@@ -216,10 +257,12 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return False
 
-    def match_position_embedding_bert(self, position_embedding_gather, input_ids, output_name_to_node):
+    def match_position_embedding_bert(
+        self, position_embedding_gather, input_ids, output_name_to_node
+    ):
         """  Match position embedding path from input_ids to Gather for BERT.
 
-        BERT Embedding Layer Pattern:       
+        BERT Embedding Layer Pattern:
                                     (input_ids)
                                    /         \
                                  /          Shape
@@ -232,7 +275,7 @@ class FusionEmbedLayerNoMask(Fusion):
                            \        |           |
                             \     Gather      Slice (data[1,512], starts=0, ends=*, axes=1, steps=1)
                               \    /            |
-                                Add          Gather 
+                                Add          Gather
                                    \       /
                                       Add
                                        |
@@ -255,7 +298,10 @@ class FusionEmbedLayerNoMask(Fusion):
             and slice_weight.shape[0] == 1
             and self.utils.check_node_input_value(slice, 1, [0])
             and self.utils.check_node_input_value(slice, 3, [1])
-            and (len(slice.input) == 4 or self.utils.check_node_input_value(slice, 4, [1]))
+            and (
+                len(slice.input) == 4
+                or self.utils.check_node_input_value(slice, 4, [1])
+            )
         ):
             return False
 
@@ -288,8 +334,12 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return input_ids == shape.input[0]
 
-    def match_position_embedding(self, position_embedding_gather, input_ids, output_name_to_node):
-        if self.match_position_embedding_bert(position_embedding_gather, input_ids, output_name_to_node):
+    def match_position_embedding(
+        self, position_embedding_gather, input_ids, output_name_to_node
+    ):
+        if self.match_position_embedding_bert(
+            position_embedding_gather, input_ids, output_name_to_node
+        ):
             return True
 
         # TODO: Support roberta (position starts from 2 instead of 0) in EmbedLayerNormalization kernel
@@ -297,15 +347,21 @@ class FusionEmbedLayerNoMask(Fusion):
         # if self.match_position_embedding_roberta(position_embedding_gather, input_ids, output_name_to_node):
         #    return True
 
-        if self.match_position_embedding_distilbert(position_embedding_gather, input_ids, output_name_to_node):
+        if self.match_position_embedding_distilbert(
+            position_embedding_gather, input_ids, output_name_to_node
+        ):
             return True
 
         return False
 
-    def check_embedding(self, word_embedding_gather, segment_embedding_gather, position_embedding_gather):
+    def check_embedding(
+        self, word_embedding_gather, segment_embedding_gather, position_embedding_gather
+    ):
         """Sanity check of embedding weights, and match hidden_size of weights and shape of inputs."""
         input_ids = word_embedding_gather.input[1]
-        segment_ids = segment_embedding_gather.input[1] if segment_embedding_gather else None
+        segment_ids = (
+            segment_embedding_gather.input[1] if segment_embedding_gather else None
+        )
         position_ids = position_embedding_gather.input[1]
 
         if self.shape_infer_helper is not None:
@@ -324,7 +380,9 @@ class FusionEmbedLayerNoMask(Fusion):
                 )
                 return False
 
-            if segment_ids and not self.shape_infer_helper.compare_shape(input_ids, segment_ids):
+            if segment_ids and not self.shape_infer_helper.compare_shape(
+                input_ids, segment_ids
+            ):
                 logger.info(
                     "Cannot fuse EmbedLayerNormalization: input_ids and segment_ids does not have same shape: {} != {}".format(
                         input_ids_shape,
@@ -333,28 +391,40 @@ class FusionEmbedLayerNoMask(Fusion):
                 )
                 return False
 
-        word_embedding_table = self.model.get_constant_value(word_embedding_gather.input[0])
+        word_embedding_table = self.model.get_constant_value(
+            word_embedding_gather.input[0]
+        )
         if word_embedding_table is None or len(word_embedding_table.shape) != 2:
-            logger.info("Cannot fuse EmbedLayerNormalization: word embedding table is not expected")
+            logger.info(
+                "Cannot fuse EmbedLayerNormalization: word embedding table is not expected"
+            )
             return False
 
-        position_embedding_table = self.model.get_constant_value(position_embedding_gather.input[0])
+        position_embedding_table = self.model.get_constant_value(
+            position_embedding_gather.input[0]
+        )
         if (
             position_embedding_table is None
             or len(position_embedding_table.shape) != 2
             or (word_embedding_table.shape[1] != position_embedding_table.shape[1])
         ):
-            logger.info("Cannot fuse EmbedLayerNormalization: position embedding table is not expected")
+            logger.info(
+                "Cannot fuse EmbedLayerNormalization: position embedding table is not expected"
+            )
             return False
 
         if segment_ids:
-            segment_embedding_table = self.model.get_constant_value(segment_embedding_gather.input[0])
+            segment_embedding_table = self.model.get_constant_value(
+                segment_embedding_gather.input[0]
+            )
             if (
                 segment_embedding_table is None
                 or len(segment_embedding_table.shape) != 2
                 or (word_embedding_table.shape[1] != segment_embedding_table.shape[1])
             ):
-                logger.info("Cannot fuse EmbedLayerNormalization: segment embedding table is not expected")
+                logger.info(
+                    "Cannot fuse EmbedLayerNormalization: segment embedding table is not expected"
+                )
                 return False
 
         # In normal case, word embeding table is the largest, and segment embedding table is the smallest, while postion embedding table is in between.
@@ -392,7 +462,9 @@ class FusionEmbedLayerNoMask(Fusion):
         graph_input = self.model.find_graph_input(input_name)
         if graph_input is not None:
             if graph_input.type.tensor_type.elem_type != TensorProto.INT32:
-                int32_output, input_cast_node = self.utils.cast_input_to_int32(input_name)
+                int32_output, input_cast_node = self.utils.cast_input_to_int32(
+                    input_name
+                )
             else:
                 int32_output = input_name
         else:
@@ -515,7 +587,9 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return len(nodes) > 1
 
-    def fuse_gpt2(self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+    def fuse_gpt2(
+        self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node
+    ):
         # graph checks
         # gpt2 has no segment embedding, subgraph pattern is like
         #     input_ids  position_ids
@@ -543,10 +617,14 @@ class FusionEmbedLayerNoMask(Fusion):
         input_ids = word_embedding_gather.input[1]
         position_ids = position_embedding_gather.input[1]
 
-        if not self.check_attention_subgraph(layernorm, input_name_to_nodes, is_distil_bert=False):
+        if not self.check_attention_subgraph(
+            layernorm, input_name_to_nodes, is_distil_bert=False
+        ):
             return False
 
-        if not self.check_embedding(word_embedding_gather, None, position_embedding_gather):
+        if not self.check_embedding(
+            word_embedding_gather, None, position_embedding_gather
+        ):
             return False
 
         optional_embedding_sum_output = False
@@ -571,7 +649,9 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return True
 
-    def fuse_distilbert(self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+    def fuse_distilbert(
+        self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node
+    ):
         """Fuse embedding layer for DistilBert
         Args:
             layernorm (NodeProto): node of LayerNormalization or SkipLayerNormalization
@@ -597,13 +677,19 @@ class FusionEmbedLayerNoMask(Fusion):
         word_embedding_gather, position_embedding_gather = two_gather
         input_ids = word_embedding_gather.input[1]
 
-        if not self.check_attention_subgraph(layernorm, input_name_to_nodes, is_distil_bert=True):
+        if not self.check_attention_subgraph(
+            layernorm, input_name_to_nodes, is_distil_bert=True
+        ):
             return False
 
-        if not self.match_position_embedding(position_embedding_gather, input_ids, output_name_to_node):
+        if not self.match_position_embedding(
+            position_embedding_gather, input_ids, output_name_to_node
+        ):
             return False
 
-        if not self.check_embedding(word_embedding_gather, None, position_embedding_gather):
+        if not self.check_embedding(
+            word_embedding_gather, None, position_embedding_gather
+        ):
             return False
 
         embed_node = self.create_fused_node(
@@ -612,7 +698,9 @@ class FusionEmbedLayerNoMask(Fusion):
         self.finish_fusion(layernorm, embed_node)
         return True
 
-    def fuse_bert(self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+    def fuse_bert(
+        self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node
+    ):
         """Fuse embedding layer for Bert
         Args:
             layernorm (NodeProto): node of LayerNormalization or SkipLayerNormalization
@@ -633,23 +721,33 @@ class FusionEmbedLayerNoMask(Fusion):
 
         input_ids = word_embedding_gather.input[1]
 
-        if not self.check_attention_subgraph(layernorm, input_name_to_nodes, is_distil_bert=False):
+        if not self.check_attention_subgraph(
+            layernorm, input_name_to_nodes, is_distil_bert=False
+        ):
             return False
 
-        position_embedding_path = self.model.match_parent_path(add_before_layernorm, ["Gather"], [1])
+        position_embedding_path = self.model.match_parent_path(
+            add_before_layernorm, ["Gather"], [1]
+        )
         if position_embedding_path is None:
             return False
 
         position_embedding_gather = position_embedding_path[0]
-        if not self.match_position_embedding(position_embedding_gather, input_ids, output_name_to_node):
-            if not self.match_position_embedding(segment_embedding_gather, input_ids, output_name_to_node):
+        if not self.match_position_embedding(
+            position_embedding_gather, input_ids, output_name_to_node
+        ):
+            if not self.match_position_embedding(
+                segment_embedding_gather, input_ids, output_name_to_node
+            ):
                 return False
             # position and segment are switched
             temp = segment_embedding_gather
             segment_embedding_gather = position_embedding_gather
             position_embedding_gather = temp
 
-        if not self.check_embedding(word_embedding_gather, segment_embedding_gather, position_embedding_gather):
+        if not self.check_embedding(
+            word_embedding_gather, segment_embedding_gather, position_embedding_gather
+        ):
             return False
 
         embed_node = self.create_fused_node(
@@ -671,13 +769,19 @@ class FusionEmbedLayerNoMask(Fusion):
         else:  # SkipLayerNormalization
             add_before_layernorm = node  # Add is fused into SkipLayerNormalization
 
-        if self.fuse_gpt2(node, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+        if self.fuse_gpt2(
+            node, add_before_layernorm, input_name_to_nodes, output_name_to_node
+        ):
             return
 
-        if self.fuse_distilbert(node, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+        if self.fuse_distilbert(
+            node, add_before_layernorm, input_name_to_nodes, output_name_to_node
+        ):
             return
 
-        if self.fuse_bert(node, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+        if self.fuse_bert(
+            node, add_before_layernorm, input_name_to_nodes, output_name_to_node
+        ):
             return
 
 
@@ -701,3 +805,274 @@ class FusionEmbedLayerNormalization(FusionEmbedLayerNoMask):
                     self.nodes_to_remove.extend([node])
                     embed_node.input.append(mask_input_name)
                     embed_node.output[1] = mask_index
+
+
+class FusionBertEmbedLayerNormalization(Fusion):
+    """
+    Fuse BertEmbedLayerNormalization subgraph into one node.
+    """
+
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "CustomEmbLayerNormPluginDynamic_IxRT", "CustomQKVToContextPluginDynamic_IxRT"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+        input -->  CustomEmbLayerNormPluginDynamic_IxRT --> CustomFCPluginDynamic_IxRT -->  CustomQKVToContextPluginDynamic_IxRT  --> CustomFCPluginDynamic_IxRT
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        parent = self.model.get_parents(node, output_name_to_node)
+        
+        if len(children) == 0:
+            return
+        if len(parent) == 0:
+            return
+
+        start_node = node
+               
+        # word_embeddings
+        word_embeddings_node = self.model.match_parent_path(
+            start_node,
+            ["CustomFCPluginDynamic_IxRT", "LayerNormalization", "Add", "Add", "Gather"],
+            [0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+
+        # token_type_embeddings
+        token_type_embeddings_node = self.model.match_parent_path(
+            start_node,
+            ["CustomFCPluginDynamic_IxRT", "LayerNormalization", "Add", "Add", "Gather"],
+            [0, 0, 0, 0, 1],
+            output_name_to_node,
+        )
+        
+        # attention_mask
+        attention_mask_node = self.model.match_parent_path(
+            start_node,
+            ["Mul", "Sub", "Cast", "Unsqueeze"],
+            [1, 0, 1, 0],
+            output_name_to_node,
+        )
+        
+        if word_embeddings_node is None or token_type_embeddings_node is None or attention_mask_node is None:
+            return
+        
+        if word_embeddings_node and token_type_embeddings_node and attention_mask_node:
+            subgraph_nodes = []
+            subgraph_nodes.extend(word_embeddings_node)
+            subgraph_nodes.extend(token_type_embeddings_node)
+            subgraph_nodes.extend(attention_mask_node)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            subgraph_nodes_remove = []
+            for item in subgraph_nodes_unique:
+                if item.op_type != "CustomFCPluginDynamic_IxRT":
+                    subgraph_nodes_remove.append(item)
+    
+        # input_ids = self.model.get_graph_inputs_excluding_initializers()[0]
+        # token_type_ids = self.model.get_graph_inputs_excluding_initializers()[1]
+        # attention_mask = self.model.get_graph_inputs_excluding_initializers()[2]
+        
+        emblayernorm_out = word_embeddings_node[1].output[0]
+        emblayernorm_out_mask = attention_mask_node[0].output[0]
+        
+        # self.model.modify_node_output_type(emblayernorm_out_mask, 5)
+    
+        beta_data = self.model.get_initializer(word_embeddings_node[1].input[2], True)
+        embeddings_layernorm_beta_name = "bert_embeddings_layernorm_beta"
+        embeddings_layernorm_beta = helper.make_tensor(
+            embeddings_layernorm_beta_name, TensorProto.FLOAT, beta_data.shape, beta_data.flatten().tolist())
+        
+        gamma_data = self.model.get_initializer(word_embeddings_node[1].input[1], True)
+        embeddings_layernorm_gamma_name = "bert_embeddings_layernorm_gamma"
+        embeddings_layernorm_gamma = helper.make_tensor(
+            embeddings_layernorm_gamma_name, TensorProto.FLOAT, gamma_data.shape, gamma_data.flatten().tolist())
+        
+        embeddings_word_embeddings_data = self.model.get_initializer(word_embeddings_node[4].input[0], True)
+        embeddings_word_embeddings_name = "bert_embeddings_word_embeddings"
+        embeddings_word_embeddings = helper.make_tensor(
+            embeddings_word_embeddings_name, TensorProto.FLOAT, embeddings_word_embeddings_data.shape, 
+            embeddings_word_embeddings_data.flatten().tolist())
+        
+        embeddings_token_type_embeddings_data = self.model.get_initializer(token_type_embeddings_node[4].input[0], True)
+        embeddings_token_type_embeddings_name = "bert_embeddings_token_type_embeddings"
+        embeddings_token_type_embeddings = helper.make_tensor(
+            embeddings_token_type_embeddings_name, TensorProto.FLOAT, embeddings_token_type_embeddings_data.shape, 
+            embeddings_token_type_embeddings_data.flatten().tolist())
+        
+        embeddings_position_embeddings_data = self.model.get_initializer(token_type_embeddings_node[2].input[1], True)
+        embeddings_position_embeddings_name = "bert_embeddings_token_type_embeddings"
+        embeddings_position_embeddings = helper.make_tensor(
+            embeddings_position_embeddings_name, TensorProto.FLOAT, embeddings_position_embeddings_data.shape, 
+            embeddings_position_embeddings_data.flatten().tolist())
+        
+        self.model.add_initializer(embeddings_layernorm_beta, self.this_graph_name)
+        self.model.add_initializer(embeddings_layernorm_gamma, self.this_graph_name)
+        self.model.add_initializer(embeddings_word_embeddings, self.this_graph_name)
+        self.model.add_initializer(embeddings_token_type_embeddings, self.this_graph_name)
+        self.model.add_initializer(embeddings_position_embeddings, self.this_graph_name)
+        
+
+        emblayernorm_node = helper.make_node(
+            "CustomEmbLayerNormPluginDynamic_IxRT",
+            inputs=[word_embeddings_node[4].input[1], token_type_embeddings_node[4].input[1], attention_mask_node[3].input[0]],
+            outputs=[emblayernorm_out, emblayernorm_out_mask],
+            name=self.model.create_node_name(
+                "BertEmbedLayerNormalization", name_prefix="BertEmbedLayerNormalization"
+            ),
+        )
+        emblayernorm_node.domain = "com.iluvatar"
+        emblayernorm_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        emblayernorm_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        emblayernorm_node.attribute.extend([helper.make_attribute("output_fp16", 1)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("full_mask", 1)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("mha_type_id", 2)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("pad_id", 0)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_layernorm_beta", embeddings_layernorm_beta)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_layernorm_gamma", embeddings_layernorm_gamma)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_word_embeddings", embeddings_word_embeddings)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_token_type_embeddings", embeddings_token_type_embeddings)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_position_embeddings", embeddings_position_embeddings)])
+        
+        self.nodes_to_remove.extend(subgraph_nodes_remove)
+        
+        self.nodes_to_add.append(emblayernorm_node)
+        self.node_name_to_graph_name[emblayernorm_node.name] = self.this_graph_name
+
+
+class FusionAlbertEmbedLayerNormalization(Fusion):
+    """
+    Fuse AlbertEmbedLayerNormalization subgraph into one node.
+    """
+
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "CustomEmbLayerNormPluginDynamic_IxRT", "CustomQKVToContextPluginDynamic_IxRT"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+        input -->  CustomEmbLayerNormPluginDynamic_IxRT --> CustomFCPluginDynamic_IxRT -->  CustomFCPluginDynamic_IxRT --> CustomQKVToContextPluginDynamic_IxRT  --> CustomFCPluginDynamic_IxRT
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        parent = self.model.get_parents(node, output_name_to_node)
+        
+        if len(children) == 0:
+            return
+        if len(parent) == 0:
+            return
+
+        start_node = node
+               
+        # word_embeddings
+        word_embeddings_node = self.model.match_parent_path(
+            start_node,
+            ["CustomFCPluginDynamic_IxRT","CustomFCPluginDynamic_IxRT", "LayerNormalization", "Add", "Add", "Gather"],
+            [0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+
+        # token_type_embeddings
+        token_type_embeddings_node = self.model.match_parent_path(
+            start_node,
+            ["CustomFCPluginDynamic_IxRT","CustomFCPluginDynamic_IxRT", "LayerNormalization", "Add", "Add", "Gather"],
+            [0, 0, 0, 0, 0, 1],
+            output_name_to_node,
+        )
+        
+        # attention_mask
+        attention_mask_node = self.model.match_parent_path(
+            start_node,
+            ["Mul", "Sub", "Cast", "Unsqueeze"],
+            [1, 0, 1, 0],
+            output_name_to_node,
+        )
+        
+        if word_embeddings_node is None or token_type_embeddings_node is None or attention_mask_node is None:
+            return
+        
+        if word_embeddings_node and token_type_embeddings_node and attention_mask_node:
+            subgraph_nodes = []
+            subgraph_nodes.extend(word_embeddings_node)
+            subgraph_nodes.extend(token_type_embeddings_node)
+            subgraph_nodes.extend(attention_mask_node)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            subgraph_nodes_remove = []
+            for item in subgraph_nodes_unique:
+                if item.op_type != "CustomFCPluginDynamic_IxRT":
+                    subgraph_nodes_remove.append(item)
+        
+        # input_ids = self.model.get_graph_inputs_excluding_initializers()[0]
+        # token_type_ids = self.model.get_graph_inputs_excluding_initializers()[1]
+        # attention_mask = self.model.get_graph_inputs_excluding_initializers()[2]
+        
+        emblayernorm_out = word_embeddings_node[2].output[0]
+        emblayernorm_out_mask = attention_mask_node[0].output[0]
+        
+        beta_data = self.model.get_initializer(word_embeddings_node[2].input[2], True)
+        embeddings_layernorm_beta_name = "bert_embeddings_layernorm_beta"
+        embeddings_layernorm_beta = helper.make_tensor(
+            embeddings_layernorm_beta_name, TensorProto.FLOAT, beta_data.shape, beta_data.flatten().tolist())
+        
+        gamma_data = self.model.get_initializer(word_embeddings_node[2].input[1], True)
+        embeddings_layernorm_gamma_name = "bert_embeddings_layernorm_gamma"
+        embeddings_layernorm_gamma = helper.make_tensor(
+            embeddings_layernorm_gamma_name, TensorProto.FLOAT, gamma_data.shape, gamma_data.flatten().tolist())
+        
+        embeddings_word_embeddings_data = self.model.get_initializer(word_embeddings_node[5].input[0], True)
+        embeddings_word_embeddings_name = "bert_embeddings_word_embeddings"
+        embeddings_word_embeddings = helper.make_tensor(
+            embeddings_word_embeddings_name, TensorProto.FLOAT, embeddings_word_embeddings_data.shape, 
+            embeddings_word_embeddings_data.flatten().tolist())
+        
+        embeddings_token_type_embeddings_data = self.model.get_initializer(token_type_embeddings_node[5].input[0], True)
+        embeddings_token_type_embeddings_name = "bert_embeddings_token_type_embeddings"
+        embeddings_token_type_embeddings = helper.make_tensor(
+            embeddings_token_type_embeddings_name, TensorProto.FLOAT, embeddings_token_type_embeddings_data.shape, 
+            embeddings_token_type_embeddings_data.flatten().tolist())
+        
+        embeddings_position_embeddings_data = self.model.get_initializer(token_type_embeddings_node[3].input[1], True)
+        embeddings_position_embeddings_name = "bert_embeddings_token_type_embeddings"
+        embeddings_position_embeddings = helper.make_tensor(
+            embeddings_position_embeddings_name, TensorProto.FLOAT, embeddings_position_embeddings_data.shape, 
+            embeddings_position_embeddings_data.flatten().tolist())
+        
+        self.model.add_initializer(embeddings_layernorm_beta, self.this_graph_name)
+        self.model.add_initializer(embeddings_layernorm_gamma, self.this_graph_name)
+        self.model.add_initializer(embeddings_word_embeddings, self.this_graph_name)
+        self.model.add_initializer(embeddings_token_type_embeddings, self.this_graph_name)
+        self.model.add_initializer(embeddings_position_embeddings, self.this_graph_name)
+        
+        emblayernorm_node = helper.make_node(
+            "CustomEmbLayerNormPluginDynamic_IxRT",
+            inputs=[word_embeddings_node[5].input[1], token_type_embeddings_node[5].input[1], attention_mask_node[3].input[0]],
+            outputs=[emblayernorm_out, emblayernorm_out_mask],
+            name=self.model.create_node_name(
+                "BertEmbedLayerNormalization", name_prefix="BertEmbedLayerNormalization"
+            ),
+        )
+        emblayernorm_node.domain = "com.iluvatar"
+        emblayernorm_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        emblayernorm_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        emblayernorm_node.attribute.extend([helper.make_attribute("output_fp16", 1)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("full_mask", 1)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("mha_type_id", 2)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("pad_id", 0)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_layernorm_beta", embeddings_layernorm_beta)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_layernorm_gamma", embeddings_layernorm_gamma)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_word_embeddings", embeddings_word_embeddings)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_token_type_embeddings", embeddings_token_type_embeddings)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_position_embeddings", embeddings_position_embeddings)])
+        
+        self.nodes_to_remove.extend(subgraph_nodes_remove)
+        
+        self.nodes_to_add.append(emblayernorm_node)
+        self.node_name_to_graph_name[emblayernorm_node.name] = self.this_graph_name
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_fastgelu.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_fastgelu.py
index 0e24a9dd..067ff26e 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_fastgelu.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_fastgelu.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_format_roformer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_format_roformer.py
index 5b6d66ad..1f60ab76 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_format_roformer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_format_roformer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu.py
index f4c5c7e8..71421266 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,8 +21,9 @@
 from logging import getLogger
 from typing import Dict, Optional
 
-from .fusion_base import Fusion
 from onnx import helper
+
+from .fusion_base import Fusion
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -25,7 +42,9 @@ class FusionGelu(Fusion):
             return
         self.fuse_4(erf_node, input_name_to_nodes, output_name_to_node)
 
-    def fuse_1(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]:
+    def fuse_1(
+        self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict
+    ) -> Optional[bool]:
         """
         This pattern is from PyTorch model
         Fuse Gelu with Erf into one node:
@@ -81,7 +100,9 @@ class FusionGelu(Fusion):
                 return
             subgraph_output = mul_half.output[0]
         else:  # pattern 1
-            mul_half = self.model.match_parent(mul_after_erf, "Mul", another, output_name_to_node)
+            mul_half = self.model.match_parent(
+                mul_after_erf, "Mul", another, output_name_to_node
+            )
             if mul_half is None:
                 return
 
@@ -100,13 +121,17 @@ class FusionGelu(Fusion):
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
-        fused_node = helper.make_node("Gelu", inputs=[subgraph_input], outputs=[subgraph_output])
+        fused_node = helper.make_node(
+            "Gelu", inputs=[subgraph_input], outputs=[subgraph_output]
+        )
         fused_node.domain = "com.microsoft"
         self.nodes_to_add.append(fused_node)
         self.node_name_to_graph_name[fused_node.name] = self.this_graph_name
         return True
 
-    def fuse_2(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]:
+    def fuse_2(
+        self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict
+    ) -> Optional[bool]:
         """
         This pattern is from Keras model
         Fuse Gelu with Erf into one node:
@@ -174,13 +199,17 @@ class FusionGelu(Fusion):
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
-        fused_node = helper.make_node("Gelu", inputs=[root_node.output[0]], outputs=[mul.output[0]])
+        fused_node = helper.make_node(
+            "Gelu", inputs=[root_node.output[0]], outputs=[mul.output[0]]
+        )
         fused_node.domain = "com.microsoft"
         self.nodes_to_add.append(fused_node)
         self.node_name_to_graph_name[fused_node.name] = self.this_graph_name
         return True
 
-    def fuse_3(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]:
+    def fuse_3(
+        self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict
+    ) -> Optional[bool]:
         """
         This pattern is from TensorFlow model
         Fuse Gelu with Erf into one node:
@@ -221,7 +250,9 @@ class FusionGelu(Fusion):
         if i < 0:
             return
 
-        root_node = self.model.get_parent(first_mul, 0 if i == 1 else 1, output_name_to_node)
+        root_node = self.model.get_parent(
+            first_mul, 0 if i == 1 else 1, output_name_to_node
+        )
         if root_node is None:
             return
 
@@ -232,7 +263,10 @@ class FusionGelu(Fusion):
             return
         last_mul = children[0]
 
-        if not (last_mul.input[0] == root_node.output[0] or last_mul.input[1] == root_node.output[0]):
+        if not (
+            last_mul.input[0] == root_node.output[0]
+            or last_mul.input[1] == root_node.output[0]
+        ):
             return
 
         subgraph_nodes = [first_mul, erf_node, add_after_erf, mul_half, last_mul]
@@ -245,13 +279,17 @@ class FusionGelu(Fusion):
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
-        fused_node = helper.make_node("Gelu", inputs=[root_node.output[0]], outputs=[last_mul.output[0]])
+        fused_node = helper.make_node(
+            "Gelu", inputs=[root_node.output[0]], outputs=[last_mul.output[0]]
+        )
         fused_node.domain = "com.microsoft"
         self.nodes_to_add.append(fused_node)
         self.node_name_to_graph_name[fused_node.name] = self.this_graph_name
         return True
 
-    def fuse_4(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]:
+    def fuse_4(
+        self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict
+    ) -> Optional[bool]:
         """
         This pattern is from TensorFlow model
         Fuse Gelu with Erf into one node:
@@ -288,7 +326,9 @@ class FusionGelu(Fusion):
             return
         mul_after_erf = children[0]
 
-        mul_before_erf = self.model.match_parent(erf_node, "Mul", 0, output_name_to_node)
+        mul_before_erf = self.model.match_parent(
+            erf_node, "Mul", 0, output_name_to_node
+        )
         if mul_before_erf is None:
             return
 
@@ -307,7 +347,9 @@ class FusionGelu(Fusion):
                 return
             subgraph_output = mul_half.output[0]
         else:  # pattern 1
-            mul_half = self.model.match_parent(mul_after_erf, "Mul", another, output_name_to_node)
+            mul_half = self.model.match_parent(
+                mul_after_erf, "Mul", another, output_name_to_node
+            )
             if mul_half is None:
                 return
 
@@ -319,15 +361,23 @@ class FusionGelu(Fusion):
 
             subgraph_output = mul_after_erf.output[0]
 
-        subgraph_nodes = [mul_before_erf, erf_node, add_after_erf, mul_after_erf, mul_half]
+        subgraph_nodes = [
+            mul_before_erf,
+            erf_node,
+            add_after_erf,
+            mul_after_erf,
+            mul_half,
+        ]
         if not self.model.is_safe_to_fuse_nodes(
             subgraph_nodes, [subgraph_output], input_name_to_nodes, output_name_to_node
         ):
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
-        fused_node = helper.make_node("Gelu", inputs=[subgraph_input], outputs=[subgraph_output])
+        fused_node = helper.make_node(
+            "Gelu", inputs=[subgraph_input], outputs=[subgraph_output]
+        )
         fused_node.domain = "com.microsoft"
         self.nodes_to_add.append(fused_node)
         self.node_name_to_graph_name[fused_node.name] = self.this_graph_name
-        return True
\ No newline at end of file
+        return True
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu_approximation.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu_approximation.py
index 35f4b93a..a89e558c 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu_approximation.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu_approximation.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,8 +21,9 @@
 
 from logging import getLogger
 
-from .fusion_base import Fusion
 from onnx import helper
+
+from .fusion_base import Fusion
 from .onnx_model import OnnxModel
 
 
@@ -19,7 +36,9 @@ class FusionGeluApproximation(Fusion):
             "FastGelu",
             inputs=node.input,
             outputs=node.output,
-            name=self.model.create_node_name("FastGelu", node.op_type + "_Approximation"),
+            name=self.model.create_node_name(
+                "FastGelu", node.op_type + "_Approximation"
+            ),
         )
         new_node.domain = "com.microsoft"
         self.nodes_to_remove.append(node)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention.py
index b856dd19..805cd3bf 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,9 +21,10 @@
 from logging import getLogger
 
 import numpy as np
+from onnx import TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -20,7 +37,9 @@ class FusionGptAttentionPastBase(Fusion):
         super().__init__(model, "Attention", "LayerNormalization", "with past")
         self.num_heads = num_heads
         self.utils = FusionUtils(model)
-        self.casted_attention_mask = {}  # map from name of attention mask to the name that casted to int32
+        self.casted_attention_mask = (
+            {}
+        )  # map from name of attention mask to the name that casted to int32
 
     def match_past_pattern_1(self, concat_k, concat_v, output_name_to_node):
         # Pattern 1:
@@ -55,7 +74,9 @@ class FusionGptAttentionPastBase(Fusion):
         if parent.op_type == "Gather":
             gather_past_k = parent
         else:
-            past_k_nodes = self.model.match_parent_path(concat_k, ["Transpose", "Gather"], [0, 0])
+            past_k_nodes = self.model.match_parent_path(
+                concat_k, ["Transpose", "Gather"], [0, 0]
+            )
             if past_k_nodes is None:
                 logger.debug("match_past_pattern_1: failed match Transpose and Gather")
                 return None
@@ -106,27 +127,39 @@ class FusionGptAttentionPastBase(Fusion):
         opset_version = self.model.get_opset_version()
         if opset_version < 13:
             if not FusionUtils.check_node_attribute(squeeze, "axes", [0]):
-                logger.debug("match_past_pattern_2: axes != [0] for Squeeze in past path")
+                logger.debug(
+                    "match_past_pattern_2: axes != [0] for Squeeze in past path"
+                )
                 return None
 
             if not FusionUtils.check_node_attribute(split, "split", [1, 1]):
-                logger.debug("match_past_pattern_2: split != [1, 1] for Split in past path")
+                logger.debug(
+                    "match_past_pattern_2: split != [1, 1] for Split in past path"
+                )
                 return None
         else:
             if not self.utils.check_node_input_value(squeeze, 1, [0]):
-                logger.debug("match_past_pattern_2: axes != [0] for Squeeze in past path")
+                logger.debug(
+                    "match_past_pattern_2: axes != [0] for Squeeze in past path"
+                )
                 return None
 
             if not self.utils.check_node_input_value(split, 1, [1, 1]):
-                logger.debug("match_past_pattern_2: split != [1, 1] for Split in past path")
+                logger.debug(
+                    "match_past_pattern_2: split != [1, 1] for Split in past path"
+                )
                 return None
 
         if not FusionUtils.check_node_attribute(split, "axis", 0, default_value=0):
-            logger.debug("match_past_pattern_2: attribute axis of Split are not expected in past path")
+            logger.debug(
+                "match_past_pattern_2: attribute axis of Split are not expected in past path"
+            )
             return None
         past = split.input[0]
 
-        past_k_nodes = self.model.match_parent_path(concat_k, ["Squeeze", "Split"], [0, 0])
+        past_k_nodes = self.model.match_parent_path(
+            concat_k, ["Squeeze", "Split"], [0, 0]
+        )
         if past_k_nodes is None:
             logger.debug("match_past_pattern_2: failed to match past_k_nodes path")
             return None
@@ -159,10 +192,14 @@ class FusionGptAttentionPastBase(Fusion):
         if input_name in self.casted_attention_mask:
             attention_mask_input_name = self.casted_attention_mask[input_name]
         elif self.model.find_graph_input(input_name):
-            casted, attention_mask_input_name = self.utils.cast_graph_input_to_int32(input_name)
+            casted, attention_mask_input_name = self.utils.cast_graph_input_to_int32(
+                input_name
+            )
             self.casted_attention_mask[input_name] = attention_mask_input_name
         else:
-            attention_mask_input_name, cast_node = self.utils.cast_input_to_int32(input_name)
+            attention_mask_input_name, cast_node = self.utils.cast_input_to_int32(
+                input_name
+            )
             self.casted_attention_mask[input_name] = attention_mask_input_name
         return attention_mask_input_name
 
@@ -245,7 +282,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
 
         another_input = add_qkv.input[1 - return_indice[0]]
 
-        v_nodes = self.model.match_parent_path(matmul_qkv, ["Concat", "Transpose", "Reshape", "Split"], [1, 1, 0, 0])
+        v_nodes = self.model.match_parent_path(
+            matmul_qkv, ["Concat", "Transpose", "Reshape", "Split"], [1, 1, 0, 0]
+        )
         if v_nodes is None:
             logger.debug("fuse_attention: failed to match v path")
             return
@@ -284,7 +323,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
         slice_mask = None
         input_mask_nodes = None
         concat_k_to_match = None
-        qk_nodes = self.model.match_parent_path(matmul_qkv, ["Softmax", "Sub", "Mul", "Div", "MatMul"], [0, 0, 0, 0, 0])
+        qk_nodes = self.model.match_parent_path(
+            matmul_qkv, ["Softmax", "Sub", "Mul", "Div", "MatMul"], [0, 0, 0, 0, 0]
+        )
         if qk_nodes is not None:
             (softmax_qk, sub_qk, mul_qk, div_qk, matmul_qk) = qk_nodes
             mask_nodes = self.model.match_parent_path(
@@ -351,7 +392,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
                     output_name_to_node,
                 )  # yapf: disable
                 if input_mask_nodes is None:
-                    logger.debug("fuse_attention: failed to match input attention mask path")
+                    logger.debug(
+                        "fuse_attention: failed to match input attention mask path"
+                    )
                     return
 
             mask_nodes = self.model.match_parent_path(
@@ -376,7 +419,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
 
             slice_mask = mask_nodes[2]
 
-            div_or_concat = self.model.get_parent(mask_nodes[-1], 0, output_name_to_node)
+            div_or_concat = self.model.get_parent(
+                mask_nodes[-1], 0, output_name_to_node
+            )
             if div_or_concat.op_type == "Div":
                 div_mask = div_or_concat
                 if div_qk != div_mask:
@@ -388,19 +433,27 @@ class FusionGptAttention(FusionGptAttentionPastBase):
                 logger.debug("fuse_attention: failed to match mask path")
 
         # Validate that the mask data is either lower triangular (unidirectional) or all ones
-        mask_data = numpy_helper.to_array(self.model.get_initializer(slice_mask.input[0]))
+        mask_data = numpy_helper.to_array(
+            self.model.get_initializer(slice_mask.input[0])
+        )
         if not (
-            len(mask_data.shape) == 4 and mask_data.shape[:2] == (1, 1) and mask_data.shape[2] == mask_data.shape[3]
+            len(mask_data.shape) == 4
+            and mask_data.shape[:2] == (1, 1)
+            and mask_data.shape[2] == mask_data.shape[3]
         ):
             logger.debug("fuse_attention: skip since mask shape is not 1x1xWxW")
             return
         if np.allclose(mask_data, np.ones_like(mask_data)):
             is_unidirectional = False
         elif not np.allclose(mask_data, np.tril(np.ones_like(mask_data))):
-            logger.debug("fuse_attention: skip since mask is neither lower triangular nor ones")
+            logger.debug(
+                "fuse_attention: skip since mask is neither lower triangular nor ones"
+            )
             return
 
-        q_nodes = self.model.match_parent_path(matmul_qk, ["Transpose", "Reshape", "Split"], [0, 0, 0])
+        q_nodes = self.model.match_parent_path(
+            matmul_qk, ["Transpose", "Reshape", "Split"], [0, 0, 0]
+        )
         if q_nodes is None:
             logger.debug("fuse_attention: failed to match q path")
             return
@@ -409,7 +462,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
             logger.debug("fuse_attention: skip since split_fc != split_q")
             return
 
-        k_nodes = self.model.match_parent_path(matmul_qk, ["Concat", "Transpose", "Reshape", "Split"], [1, 1, 0, 0])
+        k_nodes = self.model.match_parent_path(
+            matmul_qk, ["Concat", "Transpose", "Reshape", "Split"], [1, 1, 0, 0]
+        )
         if k_nodes is None:
             # This pattern is from pytorch 1.7.1 and transformers 4.6.1
             k_nodes = self.model.match_parent_path(
@@ -438,9 +493,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
             attention_mask_input_name = self.cast_attention_mask(input_name)
 
         # Match past and present paths
-        past = self.match_past_pattern_1(concat_k, concat_v, output_name_to_node) or self.match_past_pattern_2(
+        past = self.match_past_pattern_1(
             concat_k, concat_v, output_name_to_node
-        )
+        ) or self.match_past_pattern_2(concat_k, concat_v, output_name_to_node)
         if past is None:
             logger.info("fuse_attention: failed to match past path")
             return
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_megatron.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_megatron.py
index 8510ae42..138a9c5f 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_megatron.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_megatron.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,10 +21,11 @@
 from logging import getLogger
 
 import numpy as np
+from onnx import TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_gpt_attention import FusionGptAttentionPastBase
 from .fusion_utils import FusionUtils
-from onnx import TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -56,7 +73,9 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
         attention_node.attribute.extend(
             [
                 helper.make_attribute("num_heads", self.num_heads),
-                helper.make_attribute("unidirectional", 0),  # unidirectional shall not be ON for 4D attention mask
+                helper.make_attribute(
+                    "unidirectional", 0
+                ),  # unidirectional shall not be ON for 4D attention mask
             ]
         )
 
@@ -81,11 +100,15 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
         (mul_mask, sub_mask, last_slice_mask, slice_mask) = mask_nodes
 
         if mul_qk.input[1] != last_slice_mask.output[0]:
-            logger.debug("fuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]")
+            logger.debug(
+                "fuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]"
+            )
             return None
 
         if not self.utils.check_node_input_value(mul_mask, 1, 10000.0):
-            logger.debug("fuse_attention failed: mul_mask input 1 is not constant 10000.0")
+            logger.debug(
+                "fuse_attention failed: mul_mask input 1 is not constant 10000.0"
+            )
             return None
 
         if not self.utils.check_node_input_value(sub_mask, 0, 1.0):
@@ -97,23 +120,33 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
             return None
 
         if not self.utils.check_node_input_value(last_slice_mask, 1, [0]):
-            logger.debug("fuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]")
+            logger.debug(
+                "fuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]"
+            )
             return None
 
         if not self.utils.check_node_input_value(last_slice_mask, 3, [3]):
-            logger.debug("fuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]")
+            logger.debug(
+                "fuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]"
+            )
             return False
 
         if not self.utils.check_node_input_value(last_slice_mask, 4, [1]):
-            logger.debug("fuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]")
+            logger.debug(
+                "fuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]"
+            )
             return False
 
         if not self.utils.check_node_input_value(slice_mask, 3, [2]):
-            logger.debug("fuse_attention failed: slice_mask input 3 (axes) is not constant [2]")
+            logger.debug(
+                "fuse_attention failed: slice_mask input 3 (axes) is not constant [2]"
+            )
             return None
 
         if not self.utils.check_node_input_value(slice_mask, 4, [1]):
-            logger.debug("fuse_attention failed: slice_mask input 4 (steps) is not constant [1]")
+            logger.debug(
+                "fuse_attention failed: slice_mask input 4 (steps) is not constant [1]"
+            )
             return None
 
         last_slice_path = self.model.match_parent_path(
@@ -144,7 +177,10 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
             ["Unsqueeze", "Sub", "Gather", "Shape", "LayerNormalization"],
             [1, 0, 1, 0, 0],
         )
-        if first_slice_sub_1 is None or first_slice_sub_1[-1] != layernorm_before_attention:
+        if (
+            first_slice_sub_1 is None
+            or first_slice_sub_1[-1] != layernorm_before_attention
+        ):
             logger.debug("fuse_attention: failed to match last slice sub path 1")
             return None
 
@@ -199,10 +235,14 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
             layernorm_before_attention,
         ) = v_nodes
         if skip_input != layernorm_before_attention.input[0]:
-            logger.debug("fuse_attention: skip_input != layernorm_before_attention.input[0]")
+            logger.debug(
+                "fuse_attention: skip_input != layernorm_before_attention.input[0]"
+            )
             return
 
-        qk_nodes = self.model.match_parent_path(matmul_qkv, ["Softmax", "Sub", "Mul", "MatMul"], [0, 0, 0, 0])
+        qk_nodes = self.model.match_parent_path(
+            matmul_qkv, ["Softmax", "Sub", "Mul", "MatMul"], [0, 0, 0, 0]
+        )
         if qk_nodes is None:
             logger.debug("fuse_attention: failed to match qk path")
             return None
@@ -211,9 +251,13 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
             logger.debug("fuse_attention failed: softmax_qk axis != 3")
             return None
 
-        attention_mask = self.match_mask(sub_qk, mul_qk, matmul_qk, layernorm_before_attention)
+        attention_mask = self.match_mask(
+            sub_qk, mul_qk, matmul_qk, layernorm_before_attention
+        )
 
-        q_nodes = self.model.match_parent_path(matmul_qk, ["Div", "Transpose", "Reshape", "Split"], [0, 0, 0, 0])
+        q_nodes = self.model.match_parent_path(
+            matmul_qk, ["Div", "Transpose", "Reshape", "Split"], [0, 0, 0, 0]
+        )
         if q_nodes is None:
             logger.debug("fuse_attention: failed to match q path")
             return
@@ -249,19 +293,25 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
 
         num_heads = value[2]
         if num_heads != self.num_heads:
-            logger.info(f"Detected num_heads={num_heads}. Ignore user specified value {self.num_heads}")
+            logger.info(
+                f"Detected num_heads={num_heads}. Ignore user specified value {self.num_heads}"
+            )
             self.num_heads = num_heads
 
         hidden_size_per_head = value[3]
         i, value = self.model.get_constant_input(div_k)
         expected_value = float(np.sqrt(np.sqrt(hidden_size_per_head)))
         if not is_close(value, expected_value):
-            logger.debug(f"fuse_attention: div_k value={value} expected={expected_value}")
+            logger.debug(
+                f"fuse_attention: div_k value={value} expected={expected_value}"
+            )
             return
 
         i, value = self.model.get_constant_input(div_q)
         if not is_close(value, expected_value):
-            logger.debug(f"fuse_attention: div_q value={value} expected={expected_value}")
+            logger.debug(
+                f"fuse_attention: div_q value={value} expected={expected_value}"
+            )
             return
 
         # Match past and present paths
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_no_past.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_no_past.py
index ca88f144..4e538cf5 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_no_past.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_no_past.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -55,7 +71,7 @@ class FusionGptAttentionNoPast(Fusion):
                 tensor_shape = [dim for dim in tensor_value.dims]
                 break
         head_dim = math.ceil(div_value * div_value)
-        hidden_size = tensor_shape[0]
+        hidden_size = tensor_shape[1]
         num_heads = hidden_size // head_dim
 
         return num_heads, hidden_size
@@ -219,20 +235,27 @@ class FusionGptAttentionNoPast(Fusion):
         if where_qk is None:
             return
 
+        global num_heads, hidden_size
         if self.where_qk_shared is None:
             where_qk.input[1] = mask_nodes[0].output[0]
             div_qk.output[0] = where_qk.output[0]
             add_qk.input[1 - mask_return_indices[0]] = div_qk.output[0]
             self.where_qk_shared = where_qk
             self.nodes_to_remove.extend([softmax_qk, add_qk, div_qk, matmul_qk])
+            
+            num_heads, hidden_size = self.get_num_heads_and_hidden_size(
+                custom_fc_after_attention, div_qk
+            )
+            self.nodes_to_remove.extend([k_nodes[0]])
+            self.nodes_to_remove.extend(v_nodes[:-2])
         else:
             self.nodes_to_remove.extend(
                 [softmax_qk, add_qk, where_qk, div_qk, matmul_qk]
-            )
+            )      
+            self.nodes_to_remove.extend(q_nodes)
+            self.nodes_to_remove.extend(k_nodes)
+            self.nodes_to_remove.extend(v_nodes[:-1])
 
-        num_heads, hidden_size = self.get_num_heads_and_hidden_size(
-            custom_fc_after_attention, div_qk
-        )
         new_node = self.create_attention_node(
             num_heads,
             hidden_size,
@@ -247,6 +270,4 @@ class FusionGptAttentionNoPast(Fusion):
         if reshape_2 is not None:
             self.nodes_to_remove.extend([reshape_2])
         self.nodes_to_remove.extend([transpose_qkv, matmul_qkv])
-        self.nodes_to_remove.extend(q_nodes)
-        self.nodes_to_remove.extend(k_nodes)
-        self.nodes_to_remove.extend(v_nodes[:-1])
+        
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_layernorm.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_layernorm.py
index 727a1aa5..d19c3aff 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_layernorm.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_layernorm.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_options.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_options.py
index e0a1a535..c0bb11b3 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_options.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_options.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -31,6 +47,7 @@ class FusionOptions:
         self.enable_format_roformer = False
         self.enable_gpt2_classify = False
         self.enable_vit = False
+        self.enable_omdet = False
         self.attention_mask_format = AttentionMaskFormat.AttentionMask
 
         if model_type == "gpt2":
@@ -42,6 +59,8 @@ class FusionOptions:
             self.enable_format_roformer = True
         elif model_type == "vit":
             self.enable_vit = True
+        elif model_type == "omdet":
+            self.enable_omdet = True
 
     def use_raw_attention_mask(self, use_raw_mask=True):
         if use_raw_mask:
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_attention.py
index b9b502ac..9afa3edb 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -7,10 +23,11 @@ from logging import getLogger
 from typing import Tuple
 
 import numpy as np
+from onnx import NodeProto, helper
+
 from .fusion_attention import AttentionMask
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils, NumpyHelper
-from onnx import NodeProto, helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -48,19 +65,27 @@ class FusionQOrderedAttention(Fusion):
             constant_node = self.model.match_parent_path(reshape_q, ["Constant"], [1])
 
             if constant_node is None:
-                return self.num_heads, self.hidden_size  # Fall back to user specified value
+                return (
+                    self.num_heads,
+                    self.hidden_size,
+                )  # Fall back to user specified value
             else:
                 constant_node = constant_node[0]
 
                 if len(constant_node.attribute) != 1:
-                    return self.num_heads, self.hidden_size  # Fall back to user specified value
+                    return (
+                        self.num_heads,
+                        self.hidden_size,
+                    )  # Fall back to user specified value
 
                 # This is assuming it is a Tensor attribute (this is a safe assumption)
                 q_shape = constant_node.attribute[0].t
 
         q_shape_value = NumpyHelper.to_array(q_shape)
         if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
-            logger.debug(f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size].")
+            logger.debug(
+                f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
             return self.num_heads, self.hidden_size  # Fall back to user specified value
 
         num_heads = q_shape_value[2]
@@ -69,7 +94,9 @@ class FusionQOrderedAttention(Fusion):
 
         if self.num_heads > 0 and num_heads != self.num_heads:
             if self.num_heads_warning:
-                logger.warning(f"--num_heads is {self.num_heads}. Detected value is {num_heads}. Using detected value.")
+                logger.warning(
+                    f"--num_heads is {self.num_heads}. Detected value is {num_heads}. Using detected value."
+                )
                 self.num_heads_warning = False  # Do not show the warning more than once
 
         if self.hidden_size > 0 and hidden_size != self.hidden_size:
@@ -77,7 +104,9 @@ class FusionQOrderedAttention(Fusion):
                 logger.warning(
                     f"--hidden_size is {self.hidden_size}. Detected value is {hidden_size}. Using detected value."
                 )
-                self.hidden_size_warning = False  # Do not show the warning more than once
+                self.hidden_size_warning = (
+                    False  # Do not show the warning more than once
+                )
 
         return num_heads, hidden_size
 
@@ -101,7 +130,9 @@ class FusionQOrderedAttention(Fusion):
         )
 
         if dequantize_input is None:
-            logger.debug("fuse_qordered_attention: failed to match input qdq nodes path")
+            logger.debug(
+                "fuse_qordered_attention: failed to match input qdq nodes path"
+            )
             return
 
         dequantize_input = dequantize_input[-1]
@@ -109,7 +140,15 @@ class FusionQOrderedAttention(Fusion):
         # QKV nodes
         qkv_nodes = self.model.match_parent_path(
             start_node,
-            ["Add", "MatMul", "Reshape", "Transpose", "DequantizeLinear", "QuantizeLinear", "MatMul"],
+            [
+                "Add",
+                "MatMul",
+                "Reshape",
+                "Transpose",
+                "DequantizeLinear",
+                "QuantizeLinear",
+                "MatMul",
+            ],
             [None, None, 0, 0, 0, 0, 0],
         )
 
@@ -117,7 +156,15 @@ class FusionQOrderedAttention(Fusion):
             logger.debug("fuse_qordered_attention: failed to match qkv path")
             return
 
-        (_, projection_matmul, reshape_qkv, transpose_qkv, dequantize_qkv, quantize_qkv, matmul_qkv) = qkv_nodes
+        (
+            _,
+            projection_matmul,
+            reshape_qkv,
+            transpose_qkv,
+            dequantize_qkv,
+            quantize_qkv,
+            matmul_qkv,
+        ) = qkv_nodes
 
         # Make sure the Q/DQ has the proper zero points and constant per-tensor scales
         if not FusionUtils.check_qdq_node_for_fusion(quantize_qkv, self.model):
@@ -145,7 +192,14 @@ class FusionQOrderedAttention(Fusion):
         # V nodes
         v_nodes = self.model.match_parent_path(
             matmul_qkv,
-            ["Transpose", "Reshape", "DequantizeLinear", "QuantizeLinear", "Add", "MatMul"],
+            [
+                "Transpose",
+                "Reshape",
+                "DequantizeLinear",
+                "QuantizeLinear",
+                "Add",
+                "MatMul",
+            ],
             [1, 0, 0, 0, 0, None],
         )
 
@@ -163,7 +217,9 @@ class FusionQOrderedAttention(Fusion):
             return
 
         # V MatMul weight
-        dequantize_v_matmul_weight = self.model.match_parent_path(matmul_v, ["DequantizeLinear"], [1])
+        dequantize_v_matmul_weight = self.model.match_parent_path(
+            matmul_v, ["DequantizeLinear"], [1]
+        )
 
         if dequantize_v_matmul_weight is None:
             logger.debug("fuse_qordered_attention: failed to match v path")
@@ -176,7 +232,9 @@ class FusionQOrderedAttention(Fusion):
 
         # Make sure the upstream DequantizeLinear-1 has the proper zero points and scales
         # Per-channel scales are supported for weights alone
-        if not FusionUtils.check_qdq_node_for_fusion(dequantize_v_matmul_weight, self.model, False):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            dequantize_v_matmul_weight, self.model, False
+        ):
             return
 
         # QK nodes
@@ -226,7 +284,14 @@ class FusionQOrderedAttention(Fusion):
         # Q nodes
         q_nodes = self.model.match_parent_path(
             matmul_qk,
-            ["Transpose", "Reshape", "DequantizeLinear", "QuantizeLinear", "Add", "MatMul"],
+            [
+                "Transpose",
+                "Reshape",
+                "DequantizeLinear",
+                "QuantizeLinear",
+                "Add",
+                "MatMul",
+            ],
             [0, 0, 0, 0, 0, None],
         )
 
@@ -244,7 +309,9 @@ class FusionQOrderedAttention(Fusion):
             return
 
         # Q MatMul weight
-        dequantize_q_matmul_weight = self.model.match_parent_path(matmul_q, ["DequantizeLinear"], [1])
+        dequantize_q_matmul_weight = self.model.match_parent_path(
+            matmul_q, ["DequantizeLinear"], [1]
+        )
 
         if dequantize_q_matmul_weight is None:
             logger.debug("fuse_qordered_attention: failed to match q path")
@@ -257,13 +324,22 @@ class FusionQOrderedAttention(Fusion):
 
         # Make sure the upstream DequantizeLinear-1 has the proper zero points and scales
         # Per-channel scales are supported for weights alone
-        if not FusionUtils.check_qdq_node_for_fusion(dequantize_q_matmul_weight, self.model, False):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            dequantize_q_matmul_weight, self.model, False
+        ):
             return
 
         # K nodes
         k_nodes = self.model.match_parent_path(
             matmul_qk,
-            ["Transpose", "Reshape", "DequantizeLinear", "QuantizeLinear", "Add", "MatMul"],
+            [
+                "Transpose",
+                "Reshape",
+                "DequantizeLinear",
+                "QuantizeLinear",
+                "Add",
+                "MatMul",
+            ],
             [1, 0, 0, 0, 0, None],
         )
 
@@ -281,7 +357,9 @@ class FusionQOrderedAttention(Fusion):
             return
 
         # K MatMul weight
-        dequantize_k_matmul_weight = self.model.match_parent_path(matmul_k, ["DequantizeLinear"], [1])
+        dequantize_k_matmul_weight = self.model.match_parent_path(
+            matmul_k, ["DequantizeLinear"], [1]
+        )
 
         if dequantize_k_matmul_weight is None:
             logger.debug("fuse_qordered_attention: failed to match k path")
@@ -294,7 +372,9 @@ class FusionQOrderedAttention(Fusion):
 
         # Make sure the upstream DequantizeLinear-1 has the proper zero points and scales
         # Per-channel scales are supported for weights alone
-        if not FusionUtils.check_qdq_node_for_fusion(dequantize_k_matmul_weight, self.model, False):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            dequantize_k_matmul_weight, self.model, False
+        ):
             return
 
         # Mask nodes
@@ -320,7 +400,11 @@ class FusionQOrderedAttention(Fusion):
         vw_out_size = np.prod(vw.shape[1:])
 
         # Form QOrderedAttention node
-        if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_k.input[0] == root_input:
+        if (
+            matmul_v.input[0] == root_input
+            and matmul_q.input[0] == root_input
+            and matmul_k.input[0] == root_input
+        ):
             mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0])
 
             # Ascertain `num_heads` and `hidden_size`
@@ -372,13 +456,19 @@ class FusionQOrderedAttention(Fusion):
             # Transpose weight 'B' from order ROW to order COL
             # This offline transpose is needed only while using the CUDA EP
             # TODO: Make this fusion logic EP-agnostic ?
-            q_weight_tensor = self.model.get_initializer(dequantize_q_matmul_weight.input[0])
+            q_weight_tensor = self.model.get_initializer(
+                dequantize_q_matmul_weight.input[0]
+            )
             FusionUtils.transpose_2d_int8_tensor(q_weight_tensor)
 
-            k_weight_tensor = self.model.get_initializer(dequantize_k_matmul_weight.input[0])
+            k_weight_tensor = self.model.get_initializer(
+                dequantize_k_matmul_weight.input[0]
+            )
             FusionUtils.transpose_2d_int8_tensor(k_weight_tensor)
 
-            v_weight_tensor = self.model.get_initializer(dequantize_v_matmul_weight.input[0])
+            v_weight_tensor = self.model.get_initializer(
+                dequantize_v_matmul_weight.input[0]
+            )
             FusionUtils.transpose_2d_int8_tensor(v_weight_tensor)
 
             # Name and create Attention node
@@ -391,15 +481,25 @@ class FusionQOrderedAttention(Fusion):
                 name=attention_node_name,
             )
 
-            self.model.replace_node_input(dequantize_qkv, dequantize_qkv.input[0], attention_node.output[0])
-            self.model.replace_node_input(projection_matmul, projection_matmul.input[0], dequantize_qkv.output[0])
+            self.model.replace_node_input(
+                dequantize_qkv, dequantize_qkv.input[0], attention_node.output[0]
+            )
+            self.model.replace_node_input(
+                projection_matmul, projection_matmul.input[0], dequantize_qkv.output[0]
+            )
 
-            attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
+            attention_node.attribute.extend(
+                [helper.make_attribute("num_heads", num_heads)]
+            )
             attention_node.attribute.extend([helper.make_attribute("order_input", 1)])
             attention_node.attribute.extend([helper.make_attribute("order_weight", 0)])
             attention_node.attribute.extend([helper.make_attribute("order_output", 1)])
             attention_node.attribute.extend(
-                [helper.make_attribute("qkv_hidden_sizes", [qw_out_size, kw_out_size, vw_out_size])]
+                [
+                    helper.make_attribute(
+                        "qkv_hidden_sizes", [qw_out_size, kw_out_size, vw_out_size]
+                    )
+                ]
             )
 
             attention_node.domain = "com.microsoft"
@@ -407,13 +507,19 @@ class FusionQOrderedAttention(Fusion):
             self.nodes_to_add.append(attention_node)
             self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
 
-            self.nodes_to_remove.extend([reshape_qkv, transpose_qkv, quantize_qkv, matmul_qkv])
+            self.nodes_to_remove.extend(
+                [reshape_qkv, transpose_qkv, quantize_qkv, matmul_qkv]
+            )
             self.nodes_to_remove.extend(qk_nodes)
             self.nodes_to_remove.extend(q_nodes)
             self.nodes_to_remove.extend(k_nodes)
             self.nodes_to_remove.extend(v_nodes)
             self.nodes_to_remove.extend(
-                [dequantize_q_matmul_weight, dequantize_k_matmul_weight, dequantize_v_matmul_weight]
+                [
+                    dequantize_q_matmul_weight,
+                    dequantize_k_matmul_weight,
+                    dequantize_v_matmul_weight,
+                ]
             )
 
             # Use prune graph to remove mask nodes since they are shared by all attention nodes.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_gelu.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_gelu.py
index 3ce59f78..ebd165c4 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_gelu.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_gelu.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,9 +22,10 @@
 from logging import getLogger
 from typing import Dict
 
+from onnx import helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -51,7 +68,9 @@ class FusionQOrderedGelu(Fusion):
         if len(gelu_children) == 2:
             downstream_shape_node = gelu_children[1]
 
-        if not FusionUtils.check_qdq_node_for_fusion(downstream_quantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            downstream_quantize_node, self.model
+        ):
             return
 
         # The first input to Gelu should flow through a DequantizeLinear node
@@ -66,12 +85,16 @@ class FusionQOrderedGelu(Fusion):
 
         upstream_dequantize_node = first_input_parent_nodes[0]
 
-        if not FusionUtils.check_qdq_node_for_fusion(upstream_dequantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            upstream_dequantize_node, self.model
+        ):
             return
 
         # Fusion logic
         subgraph_nodes = [node]  # Gelu/FastGelu
-        subgraph_nodes.extend([downstream_quantize_node, upstream_dequantize_node])  # Relevant Q, DQ nodes
+        subgraph_nodes.extend(
+            [downstream_quantize_node, upstream_dequantize_node]
+        )  # Relevant Q, DQ nodes
 
         if not self.model.is_safe_to_fuse_nodes(
             subgraph_nodes,
@@ -94,7 +117,9 @@ class FusionQOrderedGelu(Fusion):
                 downstream_quantize_node.input[1],
             ],
             outputs=[downstream_quantize_node.output[0]],
-            name=self.model.create_node_name("QOrderedGelu", name_prefix="QOrderedGelu"),
+            name=self.model.create_node_name(
+                "QOrderedGelu", name_prefix="QOrderedGelu"
+            ),
         )
 
         # Arrange the downstream Shape's input to be fed from the
@@ -102,7 +127,9 @@ class FusionQOrderedGelu(Fusion):
         # be deemed safe
         if downstream_shape_node is not None:
             self.model.replace_node_input(
-                downstream_shape_node, downstream_shape_node.input[0], downstream_quantize_node.output[0]
+                downstream_shape_node,
+                downstream_shape_node.input[0],
+                downstream_quantize_node.output[0],
             )
 
         # TODO: We only support CuBlasLt order ORDER_ROW for now.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_layernorm.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_layernorm.py
index 08def4a2..94e38a0f 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_layernorm.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_layernorm.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,9 +21,10 @@
 from logging import getLogger
 from typing import Dict
 
+from onnx import helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -36,7 +53,11 @@ class FusionQOrderedLayerNormalization(Fusion):
         # Should have 2 children - QuantizeLinear + Shape
         if not (
             (len(children) == 1 and children[0].op_type == "QuantizeLinear")
-            or (len(children) == 2 and children[0].op_type == "QuantizeLinear" and children[1].op_type == "Shape")
+            or (
+                len(children) == 2
+                and children[0].op_type == "QuantizeLinear"
+                and children[1].op_type == "Shape"
+            )
         ):
             return
 
@@ -46,7 +67,9 @@ class FusionQOrderedLayerNormalization(Fusion):
         if len(children) == 2:
             downstream_shape_node = children[1]
 
-        if not FusionUtils.check_qdq_node_for_fusion(downstream_quantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            downstream_quantize_node, self.model
+        ):
             return
 
         # The first input to LayerNormalization should flow through a DequantizeLinear node
@@ -61,19 +84,27 @@ class FusionQOrderedLayerNormalization(Fusion):
 
         upstream_dequantize_node = first_input_parent_nodes[0]
 
-        if not FusionUtils.check_qdq_node_for_fusion(upstream_dequantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            upstream_dequantize_node, self.model
+        ):
             return
 
         # Fusion logic
         subgraph_nodes = [node]  # LayerNormalization
-        subgraph_nodes.extend([downstream_quantize_node])  # Q node after LayerNormalization
+        subgraph_nodes.extend(
+            [downstream_quantize_node]
+        )  # Q node after LayerNormalization
 
-        upstream_dequantize_node_children = self.model.get_children(upstream_dequantize_node, input_name_to_nodes)
+        upstream_dequantize_node_children = self.model.get_children(
+            upstream_dequantize_node, input_name_to_nodes
+        )
 
         # In GPT2, the DQ node will be feeding a residual downstream Add and hence,
         # we do not want to remove it
         if len(upstream_dequantize_node_children) == 1:
-            subgraph_nodes.extend([upstream_dequantize_node])  # DQ node before LayerNormalization
+            subgraph_nodes.extend(
+                [upstream_dequantize_node]
+            )  # DQ node before LayerNormalization
 
         if not self.model.is_safe_to_fuse_nodes(
             subgraph_nodes,
@@ -83,7 +114,9 @@ class FusionQOrderedLayerNormalization(Fusion):
             input_name_to_nodes,
             output_name_to_node,
         ):
-            logger.debug(f"It is not safe to fuse QOrderedLayerNormalization node. Skip")
+            logger.debug(
+                f"It is not safe to fuse QOrderedLayerNormalization node. Skip"
+            )
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
@@ -98,7 +131,9 @@ class FusionQOrderedLayerNormalization(Fusion):
                 downstream_quantize_node.input[1],
             ],
             outputs=[downstream_quantize_node.output[0]],
-            name=self.model.create_node_name("QOrderedLayerNormalization", name_prefix="QOrderedLayerNormalization"),
+            name=self.model.create_node_name(
+                "QOrderedLayerNormalization", name_prefix="QOrderedLayerNormalization"
+            ),
         )
 
         # Arrange the downstream Shape's input to be fed from the
@@ -106,7 +141,9 @@ class FusionQOrderedLayerNormalization(Fusion):
         # be deemed safe
         if downstream_shape_node is not None:
             self.model.replace_node_input(
-                downstream_shape_node, downstream_shape_node.input[0], downstream_quantize_node.output[0]
+                downstream_shape_node,
+                downstream_shape_node.input[0],
+                downstream_quantize_node.output[0],
             )
 
         # TODO: We only support CuBlasLt order ORDER_ROW for now.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_matmul.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_matmul.py
index de0196c5..8c8050e1 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_matmul.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_matmul.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,9 +22,10 @@
 from logging import getLogger
 from typing import Dict
 
+from onnx import helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -53,9 +70,14 @@ class FusionQOrderedMatMul(Fusion):
         if bias_add_child.op_type == "Add":
             residual_add_node = bias_add_child
 
-            residual_add_children = self.model.get_children(residual_add_node, input_name_to_nodes)
+            residual_add_children = self.model.get_children(
+                residual_add_node, input_name_to_nodes
+            )
 
-            if len(residual_add_children) != 1 or residual_add_children[0].op_type != "QuantizeLinear":
+            if (
+                len(residual_add_children) != 1
+                or residual_add_children[0].op_type != "QuantizeLinear"
+            ):
                 return
 
             downstream_quantize_node = residual_add_children[0]
@@ -67,7 +89,9 @@ class FusionQOrderedMatMul(Fusion):
             return
 
         # Make sure the downstream QuantizeLinear has the proper zero points and scales
-        if not FusionUtils.check_qdq_node_for_fusion(downstream_quantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            downstream_quantize_node, self.model
+        ):
             return
 
         # The first input to MatMul should flow through a DequantizeLinear node
@@ -84,7 +108,12 @@ class FusionQOrderedMatMul(Fusion):
         if first_path_id < 0:
             first_path_id, first_input_parent_nodes, _ = self.model.match_parent_paths(
                 node,
-                [(["Reshape", "Transpose", "DequantizeLinear", "QuantizeLinear"], [0, 0, 0, 0])],
+                [
+                    (
+                        ["Reshape", "Transpose", "DequantizeLinear", "QuantizeLinear"],
+                        [0, 0, 0, 0],
+                    )
+                ],
                 output_name_to_node,
             )
 
@@ -107,7 +136,17 @@ class FusionQOrderedMatMul(Fusion):
 
         weight_path_id, weight_nodes, _ = self.model.match_parent_paths(
             node,
-            [(["DequantizeLinear", "QuantizeLinear", "Transpose", "DequantizeLinear"], [1, 0, 0, 0])],
+            [
+                (
+                    [
+                        "DequantizeLinear",
+                        "QuantizeLinear",
+                        "Transpose",
+                        "DequantizeLinear",
+                    ],
+                    [1, 0, 0, 0],
+                )
+            ],
             output_name_to_node,
         )
 
@@ -132,14 +171,20 @@ class FusionQOrderedMatMul(Fusion):
 
         # Make sure the upstream DequantizeLinear-1 has the proper zero points and scales
         # Per-channel scales are supported for weights alone
-        if not FusionUtils.check_qdq_node_for_fusion(dequantize_node_1, self.model, False):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            dequantize_node_1, self.model, False
+        ):
             return
 
         # Make sure the upstream flow into the Residual Add node flows through a DQ node
         residual_add_dequantize_node = None
 
         if residual_add_node is not None:
-            residual_path_id, residual_input_parent_nodes, _ = self.model.match_parent_paths(
+            (
+                residual_path_id,
+                residual_input_parent_nodes,
+                _,
+            ) = self.model.match_parent_paths(
                 residual_add_node,
                 [
                     (["DequantizeLinear"], [1]),
@@ -153,8 +198,11 @@ class FusionQOrderedMatMul(Fusion):
             residual_add_dequantize_node = residual_input_parent_nodes[0]
 
         # Make sure the upstream DequantizeLinear to the Residual Add has the proper zero points and scales
-        if residual_add_dequantize_node is not None and not FusionUtils.check_qdq_node_for_fusion(
-            residual_add_dequantize_node, self.model
+        if (
+            residual_add_dequantize_node is not None
+            and not FusionUtils.check_qdq_node_for_fusion(
+                residual_add_dequantize_node, self.model
+            )
         ):
             return
 
@@ -168,18 +216,25 @@ class FusionQOrderedMatMul(Fusion):
         subgraph_nodes.extend([downstream_quantize_node])  # Downstream Q node
 
         if not self.model.is_safe_to_fuse_nodes(
-            subgraph_nodes, downstream_quantize_node.output, input_name_to_nodes, output_name_to_node
+            subgraph_nodes,
+            downstream_quantize_node.output,
+            input_name_to_nodes,
+            output_name_to_node,
         ):
             logger.debug(f"It is not safe to fuse QOrderedMatMul node. Skip")
             return
 
         # Deal with the case where-in the Attention subgraph is not fused
         if transpose_node_0 is not None:
-            self.model.replace_node_input(transpose_node_0, transpose_node_0.input[0], dequantize_node_0.input[0])
+            self.model.replace_node_input(
+                transpose_node_0, transpose_node_0.input[0], dequantize_node_0.input[0]
+            )
 
         # Make inputs
         fused_node_inputs = [
-            reshape_node_0.output[0] if reshape_node_0 is not None else dequantize_node_0.input[0],
+            reshape_node_0.output[0]
+            if reshape_node_0 is not None
+            else dequantize_node_0.input[0],
             dequantize_node_0.input[1],
             dequantize_node_1.input[0],
             dequantize_node_1.input[1],
@@ -203,7 +258,9 @@ class FusionQOrderedMatMul(Fusion):
             "QOrderedMatMul",
             inputs=fused_node_inputs,
             outputs=[downstream_quantize_node.output[0]],
-            name=self.model.create_node_name("QOrderedMatMul", name_prefix="QOrderedMatMul"),
+            name=self.model.create_node_name(
+                "QOrderedMatMul", name_prefix="QOrderedMatMul"
+            ),
         )
 
         fused_node.attribute.extend([helper.make_attribute("order_A", 1)])
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_reshape.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_reshape.py
index d2b46c16..2a5bf73f 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_reshape.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_reshape.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,8 +22,9 @@
 from logging import getLogger
 
 import numpy as np
-from .fusion_base import Fusion
 from onnx import TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -19,7 +36,7 @@ class FusionReshape(Fusion):
         self.prune_graph: bool = False
 
     def replace_reshape_node(self, shape, reshape_node, concat_node):
-        shape_value = np.asarray(shape, dtype=np.int64)
+        shape_value = np.asarray([int(x) if isinstance(x, np.ndarray) else x for x in shape], dtype=np.int64)
         constant_shape_name = self.model.create_node_name("Constant", "constant_shape")
         new_node = helper.make_node(
             "Constant",
@@ -44,7 +61,11 @@ class FusionReshape(Fusion):
             return
 
         concat_node = output_name_to_node[reshape_node.input[1]]
-        if concat_node.op_type != "Concat" or len(concat_node.input) < 3 or len(concat_node.input) > 4:
+        if (
+            concat_node.op_type != "Concat"
+            or len(concat_node.input) < 3
+            or len(concat_node.input) > 4
+        ):
             return
 
         path0 = self.model.match_parent_path(
@@ -83,7 +104,10 @@ class FusionReshape(Fusion):
         path2 = []
         path3 = []
         shape_nodes = [shape_0, shape_1]
-        if len(concat_node.input) == 3 and self.model.get_initializer(concat_node.input[2]) is None:
+        if (
+            len(concat_node.input) == 3
+            and self.model.get_initializer(concat_node.input[2]) is None
+        ):
             path2 = self.model.match_parent_path(
                 concat_node,
                 ["Unsqueeze", "Mul", "Gather", "Shape"],
@@ -128,7 +152,10 @@ class FusionReshape(Fusion):
             else:
                 shape.append(concat_value)
 
-        if len(concat_node.input) == 4 and self.model.get_initializer(concat_node.input[3]) is None:
+        if (
+            len(concat_node.input) == 4
+            and self.model.get_initializer(concat_node.input[3]) is None
+        ):
             if -1 in shape:
                 return
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rms_norm.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rms_norm.py
index c831f15c..b3ec51a5 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rms_norm.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rms_norm.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 import logging
 from typing import Dict
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_roformer_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_roformer_attention.py
index a5079c2d..1d99595e 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_roformer_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_roformer_attention.py
@@ -115,9 +115,12 @@ class FusionRoformerCrossAttention(Fusion):
         attention_node.domain = "com.iluvatar"
         attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
         attention_node.attribute.extend([helper.make_attribute("has_mask", 1)])
+        attention_node.attribute.extend([helper.make_attribute("type_mask", 4)])  #3:float mask 4:int32 mask
+        attention_node.attribute.extend([helper.make_attribute("scale", 1.0 / 8)]) #1 /sqrt(num_heads)
+        
         attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
         attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
-
+        
         return attention_node
 
     def get_shape(self, edge_name):
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rope.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rope.py
index 2ca376c3..dfa14d0e 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rope.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rope.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_shape.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_shape.py
index b47be680..727d4b82 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_shape.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_shape.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,9 +22,10 @@
 from logging import getLogger
 from typing import Dict, List, Union
 
+from onnx import NodeProto, TensorProto
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import NodeProto, TensorProto
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -21,7 +38,9 @@ class FusionShape(Fusion):
         self.shape_infer = None
         self.shape_infer_done = False
 
-    def get_dimensions_from_tensor_proto(self, tensor_proto: TensorProto) -> Union[int, None]:
+    def get_dimensions_from_tensor_proto(
+        self, tensor_proto: TensorProto
+    ) -> Union[int, None]:
         if tensor_proto.type.tensor_type.HasField("shape"):
             return len(tensor_proto.type.tensor_type.shape.dim)
         else:
@@ -37,7 +56,9 @@ class FusionShape(Fusion):
             self.shape_infer_done = True
 
         if self.shape_infer is not None:
-            return self.get_dimensions_from_tensor_proto(self.shape_infer.known_vi_[input_name])
+            return self.get_dimensions_from_tensor_proto(
+                self.shape_infer.known_vi_[input_name]
+            )
 
         return None
 
@@ -58,7 +79,7 @@ class FusionShape(Fusion):
                 |                |
             Unsqueeze(axes=0)   Unsqueeze(axes=0)
                    \          /
-                      Concat 
+                      Concat
                         |
 
         into  (2d_input) --> Shape -->
@@ -88,7 +109,9 @@ class FusionShape(Fusion):
             elif shape.input[0] != root:
                 return
 
-            if not FusionUtils.check_node_attribute(unsqueeze, "axis", 0, default_value=0):
+            if not FusionUtils.check_node_attribute(
+                unsqueeze, "axis", 0, default_value=0
+            ):
                 return
 
             if opset_version < 13:
@@ -101,7 +124,9 @@ class FusionShape(Fusion):
             value = self.model.get_constant_value(gather.input[1])
             from numpy import array_equal, ndarray
 
-            if not (isinstance(value, ndarray) and value.size == 1 and value.item() == i):
+            if not (
+                isinstance(value, ndarray) and value.size == 1 and value.item() == i
+            ):
                 return
 
         if self.model.find_graph_output(concat_node.output[0]) is None:
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_skiplayernorm.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_skiplayernorm.py
index 58689644..d0797b26 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_skiplayernorm.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_skiplayernorm.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV.py
index a74fe9ee..436257c3 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV_update_KVcache.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV_update_KVcache.py
new file mode 100644
index 00000000..4152eef6
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV_update_KVcache.py
@@ -0,0 +1,128 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import Tuple, Union
+
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_utils import NumpyHelper
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+
+class FusionSplitQKVUpdateKVCache(Fusion):
+    """
+    Fuse FusionSplitQKVUpdateKVCache
+    """
+
+    def __init__(self, model: OnnxModel, hidden_size: int, num_heads: int):
+        super().__init__(
+            model, "SplitQKVUpdateKVCache_IxRT", "CustomQkvCrossToContext_IxRT"
+        )
+
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+
+    def create_node(
+        self,
+        inputs: list,
+        outputs: list,
+    ) -> Union[NodeProto, None]:
+        """Create an XSoftmax node.
+
+        Args:
+            data_input (str): data input name
+            mask_input (str): max input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+        node_name = self.model.create_node_name("SplitQKVUpdateKVCache_IxRT")
+
+        new_node = helper.make_node(
+            "SplitQKVUpdateKVCache_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=node_name,
+        )
+        new_node.domain = "com.iluvatar"
+        new_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        new_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        new_node.attribute.extend([helper.make_attribute("num_head", self.num_heads)])
+        new_node.attribute.extend(
+            [helper.make_attribute("head_dim", self.hidden_size // self.num_heads)]
+        )
+
+        return new_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        query_paths = {
+            "query_path": (
+                ["Transpose", "Reshape", "Split"],
+                [0, 0, None],
+            ),
+        }
+
+        key_paths = {
+            "key_path": (
+                ["Concat", "Transpose", "Reshape", "Split"],
+                [1, None, 0, None],
+            ),
+        }
+
+        value_paths = {
+            "value_path": (
+                ["Concat", "Transpose", "Reshape", "Split"],
+                [2, None, 0, None],
+            ),
+        }
+
+        q_nodes, q_path = self.match_parent_path_from_dict(node, query_paths)
+
+        k_nodes, k_path = self.match_parent_path_from_dict(node, key_paths)
+
+        v_nodes, v_path = self.match_parent_path_from_dict(node, value_paths)
+
+        if (q_nodes is not None) and (k_nodes is not None) and (v_nodes is not None):
+            (q_transpose_node, q_reshape_node, q_split_node) = q_nodes
+            (k_concat_node, k_transpose_node, k_reshape_node, k_split_node) = k_nodes
+
+            (v_concat_node, v_transpose_node, v_reshape_node, v_split_node) = v_nodes
+
+            inputs = [
+                q_split_node.input[0],
+                k_concat_node.input[0],
+                v_concat_node.input[0],
+            ]
+
+            outputs = [
+                q_transpose_node.output[0],
+                k_concat_node.output[0],
+                v_concat_node.output[0],
+            ]
+
+            new_node = self.create_node(inputs, outputs)
+
+            self.nodes_to_add.append(new_node)
+            self.node_name_to_graph_name[new_node.name] = self.this_graph_name
+            self.nodes_to_remove.append(q_transpose_node)
+            self.nodes_to_remove.append(q_reshape_node)
+            self.nodes_to_remove.append(q_split_node)
+
+            self.nodes_to_remove.append(k_concat_node)
+            self.nodes_to_remove.append(k_transpose_node)
+            self.nodes_to_remove.append(k_reshape_node)
+
+            self.nodes_to_remove.append(v_concat_node)
+            self.nodes_to_remove.append(v_transpose_node)
+            self.nodes_to_remove.append(v_reshape_node)
+
+        else:
+            return
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_swinl_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_swinl_attention.py
index 8edb9a5a..e446a69a 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_swinl_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_swinl_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,16 +22,17 @@ from enum import Enum
 from logging import getLogger
 from os import name
 from sys import path
-from typing import Tuple, Union, List
+from typing import List, Tuple, Union
 
 import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_options import AttentionMaskFormat
 from .fusion_utils import FusionUtils, NumpyHelper
-from onnx import NodeProto, TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
-import onnx
 
 logger = getLogger(__name__)
 
@@ -34,10 +51,14 @@ class FusionSwinLAttention(Fusion):
     """
 
     def __init__(
-            self,
-            model: OnnxModel,
+        self,
+        model: OnnxModel,
     ):
-        super().__init__(model, "CustomQKVToContextPluginDynamic_IxRT", ["CustomFCPluginDynamic_IxRT"])
+        super().__init__(
+            model,
+            "CustomQKVToContextPluginDynamic_IxRT",
+            ["CustomFCPluginDynamic_IxRT"],
+        )
 
         # Flags to show warning only once
         self.num_heads_warning = True
@@ -61,7 +82,9 @@ class FusionSwinLAttention(Fusion):
 
         v_shape_value = NumpyHelper.to_array(v_shape)
         if len(v_shape_value) != 3 or (v_shape_value[1] <= 0 or v_shape_value[2] <= 0):
-            logger.debug(f"v_shape_value={v_shape_value}. Expected value are like [0, 0, num_heads, head_size].")
+            logger.debug(
+                f"v_shape_value={v_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
             return self.num_heads, self.hidden_size  # Fall back to user specified value
 
         num_heads = 1
@@ -74,11 +97,11 @@ class FusionSwinLAttention(Fusion):
         return num_heads, hidden_size
 
     def create_attention_node(
-            self,
-            num_heads: int,
-            hidden_size: int,
-            inputs: List[str],
-            output: str,
+        self,
+        num_heads: int,
+        hidden_size: int,
+        inputs: List[str],
+        output: str,
     ) -> Union[NodeProto, None]:
         """Create an Attention node.
 
@@ -94,7 +117,9 @@ class FusionSwinLAttention(Fusion):
         assert num_heads > 0
 
         if hidden_size > 0 and (hidden_size % num_heads) != 0:
-            logger.debug(f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}")
+            logger.debug(
+                f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}"
+            )
             return None
 
         attention_node_name = self.model.create_node_name("Attention")
@@ -108,7 +133,9 @@ class FusionSwinLAttention(Fusion):
         attention_node.domain = "com.iluvatar"
         attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
         attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
-        attention_node.attribute.extend([helper.make_attribute("hidden_size", hidden_size)])
+        attention_node.attribute.extend(
+            [helper.make_attribute("hidden_size", hidden_size)]
+        )
         attention_node.attribute.extend([helper.make_attribute("has_mask", 1)])
         attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
         attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
@@ -120,8 +147,7 @@ class FusionSwinLAttention(Fusion):
         self.fuse_pattern2(normalize_node, input_name_to_nodes, output_name_to_node)
 
     def fuse_pattern2(self, normalize_node, input_name_to_nodes, output_name_to_node):
-        """ match Swin-L pattern and fuse them to CustomFC --> Attention --> CustomFC
-         """
+        """match Swin-L pattern and fuse them to CustomFC --> Attention --> CustomFC"""
         logger.debug("fuse swin-L attention pass")
         # 1. CustomFCPluginDynamic_IxRT node as start, go up to find a pattern for swin-L pattern
         start_node = normalize_node
@@ -132,49 +158,76 @@ class FusionSwinLAttention(Fusion):
         if qkv_nodes is None:
             logger.debug("fuse_attention: failed to match qkv path")
             return
-        assert qkv_path == 'path1', 'abnormal qkv path'
+        assert qkv_path == "path1", "abnormal qkv path"
         reshape_qkv, transpose_qkv, matmul_qkv = qkv_nodes
 
         # 2. MatMul as start, go up to find v path
         v_paths = {
-            "path1": (["Transpose", "Reshape", "CustomFCPluginDynamic_IxRT"], [None, 0, 0])
+            "path1": (
+                ["Transpose", "Reshape", "CustomFCPluginDynamic_IxRT"],
+                [None, 0, 0],
+            )
         }
         v_nodes, v_path = self.match_parent_path_from_dict(matmul_qkv, v_paths)
         if not v_nodes:
             logger.debug("fuse_attention: failed to match v path")
             return
-        assert v_path == 'path1', 'abnormal v path'
+        assert v_path == "path1", "abnormal v path"
 
         # 3. MatMul as start, go up to find q,k paths
         # q path
         q_paths = {
-            "path1": (["Softmax", "Add", "Div", "MatMul", "Transpose", "Reshape", "CustomFCPluginDynamic_IxRT"],
-                      [None, 0, 0, 0, 0, 0, 0]),
+            "path1": (
+                [
+                    "Softmax",
+                    "Add",
+                    "Div",
+                    "MatMul",
+                    "Transpose",
+                    "Reshape",
+                    "CustomFCPluginDynamic_IxRT",
+                ],
+                [None, 0, 0, 0, 0, 0, 0],
+            ),
         }
         q_nodes, q_path = self.match_parent_path_from_dict(matmul_qkv, q_paths)
         if not q_nodes:
             logger.debug("fuse_attention: failed to match q path")
             return
-        assert q_path == 'path1', 'abnormal q paths found'
+        assert q_path == "path1", "abnormal q paths found"
 
         # get Add(bias) input name as fused Attention inputs
         add_op, div_op = q_nodes[1], q_nodes[2]
-        relative_position_bias_name = add_op.input[1] if add_op.input[0] == div_op.output[0] else add_op.input[0]
+        relative_position_bias_name = (
+            add_op.input[1] if add_op.input[0] == div_op.output[0] else add_op.input[0]
+        )
 
         # k path
         k_paths = {
-            "path2": (["Softmax", "Add", "Div", "MatMul", "Transpose", "Reshape", "CustomFCPluginDynamic_IxRT"],
-                      [None, 0, 0, 0, 1, 0, 0])
+            "path2": (
+                [
+                    "Softmax",
+                    "Add",
+                    "Div",
+                    "MatMul",
+                    "Transpose",
+                    "Reshape",
+                    "CustomFCPluginDynamic_IxRT",
+                ],
+                [None, 0, 0, 0, 1, 0, 0],
+            )
         }
         k_nodes, k_path = self.match_parent_path_from_dict(matmul_qkv, k_paths)
         if not k_nodes:
             logger.debug("fuse_attention: failed to match k path")
             return
-        assert k_path == 'path2', 'abnormal k paths found'
+        assert k_path == "path2", "abnormal k paths found"
         # 4. Fuse 3 CustomFC into one, and fuse attention
         # Fuse FCs
         fc_nodes = [q_nodes[-1], k_nodes[-1], v_nodes[-1]]
-        weight = self.fuse_tensor_in_node_attrs(fc_nodes, "W", q_nodes[-1].name + "_Weight")
+        weight = self.fuse_tensor_in_node_attrs(
+            fc_nodes, "W", q_nodes[-1].name + "_Weight"
+        )
         bias = self.fuse_tensor_in_node_attrs(fc_nodes, "B", q_nodes[-1].name + "_Bias")
         fused_node = helper.make_node(
             "CustomFCPluginDynamic_IxRT",
@@ -183,7 +236,9 @@ class FusionSwinLAttention(Fusion):
             name=self.model.create_node_name("CustomFC", "MatMul_AddBias_"),
         )
         fused_node.domain = "com.iluvatar"
-        fused_node.attribute.extend([helper.make_attribute("out_dims", numpy_helper.to_array(bias).shape[0])])
+        fused_node.attribute.extend(
+            [helper.make_attribute("out_dims", numpy_helper.to_array(bias).shape[0])]
+        )
         fused_node.attribute.extend([helper.make_attribute("type_id", 2)])
         fused_node.attribute.extend([helper.make_attribute("W", weight)])
         fused_node.attribute.extend([helper.make_attribute("B", bias)])
@@ -205,12 +260,13 @@ class FusionSwinLAttention(Fusion):
             return
         self.nodes_to_add.append(attention_node)
         self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
-        self.nodes_to_remove.extend([*qkv_nodes, *q_nodes[:-2], *k_nodes[:-2], *v_nodes])
+        self.nodes_to_remove.extend(
+            [*qkv_nodes, *q_nodes[:-2], *k_nodes[:-2], *v_nodes]
+        )
         self.prune_graph = True
 
     def fuse_pattern1(self, normalize_node, input_name_to_nodes, output_name_to_node):
-        """ match Swin-L pattern and fuse them to CustomFC --> Attention --> CustomFC
-        """
+        """match Swin-L pattern and fuse them to CustomFC --> Attention --> CustomFC"""
         logger.debug("fuse swin-L attention pass")
         # 1. CustomFCPluginDynamic_IxRT node as start, go up to find a pattern for swin-L pattern
         start_node = normalize_node
@@ -221,45 +277,74 @@ class FusionSwinLAttention(Fusion):
         if qkv_nodes is None:
             logger.debug("fuse_attention: failed to match qkv path")
             return
-        assert qkv_path == 'path1', 'abnormal qkv path'
+        assert qkv_path == "path1", "abnormal qkv path"
         reshape_qkv, transpose_qkv, matmul_qkv = qkv_nodes
 
         # 2. MatMul as start, go up to find v path
         v_paths = {
-            "path1": (["Transpose", "Reshape", "Add", "Split", "MatMul"], [None, 0, 0, None, 0])
+            "path1": (
+                ["Transpose", "Reshape", "Add", "Split", "MatMul"],
+                [None, 0, 0, None, 0],
+            )
         }
         v_nodes, v_path = self.match_parent_path_from_dict(matmul_qkv, v_paths)
         if not v_nodes:
             logger.debug("fuse_attention: failed to match v path")
             return
-        assert v_path == 'path1', 'abnormal v path'
+        assert v_path == "path1", "abnormal v path"
 
         # 3. MatMul as start, go up to find q,k paths
         # q path
         q_paths = {
-            "path1": (["Softmax", "Add", "Div", "MatMul", "Transpose", "Reshape", "Add", "Split", "MatMul"],
-                      [None, 0, 0, 0, 0, 0, 0, None, 0]),
+            "path1": (
+                [
+                    "Softmax",
+                    "Add",
+                    "Div",
+                    "MatMul",
+                    "Transpose",
+                    "Reshape",
+                    "Add",
+                    "Split",
+                    "MatMul",
+                ],
+                [None, 0, 0, 0, 0, 0, 0, None, 0],
+            ),
         }
         q_nodes, q_path = self.match_parent_path_from_dict(matmul_qkv, q_paths)
         if not q_nodes:
             logger.debug("fuse_attention: failed to match q path")
             return
-        assert q_path == 'path1', 'abnormal q paths found'
+        assert q_path == "path1", "abnormal q paths found"
 
         # get Add(bias) input name as fused Attention inputs
         add_op, div_op = q_nodes[1], q_nodes[2]
-        relative_position_bias_name = add_op.input[1] if add_op.input[0] == div_op.output[0] else add_op.input[0]
+        relative_position_bias_name = (
+            add_op.input[1] if add_op.input[0] == div_op.output[0] else add_op.input[0]
+        )
 
         # k path
         k_paths = {
-            "path2": (["Softmax", "Add", "Div", "MatMul", "Transpose", "Reshape", "Add", "Split", "MatMul"],
-                      [None, 0, 0, 0, 1, 0, 0, None, 0])
+            "path2": (
+                [
+                    "Softmax",
+                    "Add",
+                    "Div",
+                    "MatMul",
+                    "Transpose",
+                    "Reshape",
+                    "Add",
+                    "Split",
+                    "MatMul",
+                ],
+                [None, 0, 0, 0, 1, 0, 0, None, 0],
+            )
         }
         k_nodes, k_path = self.match_parent_path_from_dict(matmul_qkv, k_paths)
         if not k_nodes:
             logger.debug("fuse_attention: failed to match k path")
             return
-        assert k_path == 'path2', 'abnormal k paths found'
+        assert k_path == "path2", "abnormal k paths found"
         # 4. Attention and CustomFC have been found, now transform the found nodes to two plugin nodes
         # Test 3 paths have the same origin
         is_same_origin = q_nodes[-1] is k_nodes[-1] is v_nodes[-1]
@@ -279,9 +364,11 @@ class FusionSwinLAttention(Fusion):
         if not weight or not all(biases):
             print("swin-L: couldn't find weights")
             return
-        weight_arr = onnx.numpy_helper.to_array(weight).transpose(1,0)
+        weight_arr = onnx.numpy_helper.to_array(weight).transpose(1, 0)
         weight.CopyFrom(numpy_helper.from_array(weight_arr))
-        bias_arr = np.concatenate([onnx.numpy_helper.to_array(i) for i in biases], axis=0)
+        bias_arr = np.concatenate(
+            [onnx.numpy_helper.to_array(i) for i in biases], axis=0
+        )
 
         fused_node = helper.make_node(
             "CustomFCPluginDynamic_IxRT",
@@ -290,10 +377,14 @@ class FusionSwinLAttention(Fusion):
             name=self.model.create_node_name("CustomFC", "MatMul_AddBias_"),
         )
         fused_node.domain = "com.iluvatar"
-        fused_node.attribute.extend([helper.make_attribute("out_dims", bias_arr.shape[0])])
+        fused_node.attribute.extend(
+            [helper.make_attribute("out_dims", bias_arr.shape[0])]
+        )
         fused_node.attribute.extend([helper.make_attribute("type_id", 2)])
         fused_node.attribute.extend([helper.make_attribute("W", weight)])
-        fused_node.attribute.extend([helper.make_attribute("B", numpy_helper.from_array(bias_arr))])
+        fused_node.attribute.extend(
+            [helper.make_attribute("B", numpy_helper.from_array(bias_arr))]
+        )
         fused_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
         fused_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
         fused_node.attribute.extend([helper.make_attribute("act_type", -1)])
@@ -305,13 +396,14 @@ class FusionSwinLAttention(Fusion):
             hidden_size,
             [fused_node.output[0], relative_position_bias_name],
             reshape_qkv.output[0],
-
         )
         if not attention_node:
             return
         self.nodes_to_add.append(attention_node)
         self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
-        self.nodes_to_remove.extend([*qkv_nodes, *q_nodes[:-2], *k_nodes[:-2], *v_nodes])
+        self.nodes_to_remove.extend(
+            [*qkv_nodes, *q_nodes[:-2], *k_nodes[:-2], *v_nodes]
+        )
         self.prune_graph = True
 
     def fuse_tensor_in_node_attrs(self, fc_nodes, attr_name, tensor_name):
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_t5_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_t5_attention.py
index 661e8375..bce0ab17 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_t5_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_t5_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -22,7 +38,7 @@ from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_typ
 logger = getLogger(__name__)
 
 
-class FusionT5Attention(Fusion):
+class FusionT5EncoderAttention(Fusion):
     """
     Fuse T5Attention subgraph into one Attention node.
     """
@@ -310,3 +326,170 @@ class FusionT5Attention(Fusion):
             self.nodes_to_remove.extend(q_nodes)
             self.nodes_to_remove.extend(k_nodes)
             self.nodes_to_remove.extend(v_nodes[:-2])
+
+
+class FusionT5DecoderAttention(Fusion):
+    """
+    Fuse T5Attention subgraph into one Attention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+    ):
+        super().__init__(
+            model,
+            "CustomQkvCrossToContext_IxRT",
+            ["Softmax"],
+        )
+
+        # Flags to show warning only once
+        self.num_heads_warning = True
+        self.hidden_size_warning = True
+
+    def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> Tuple[int, int]:
+        """Detect num_heads and hidden_size from a reshape node.
+
+        Args:
+            reshape_q (NodeProto): reshape node for Q
+
+        Returns:
+            Tuple[int, int]: num_heads and hidden_size
+        """
+
+        # we assume that reshape fusion has done, so the shape is a tensor like [0, 0, num_heads, head_size]
+        q_shape = self.model.get_initializer(reshape_q.input[1])
+        if q_shape is None:
+            logger.debug(f"{reshape_q.input[1]} is not initializer.")
+            return [0, 0]
+
+        q_shape_value = NumpyHelper.to_array(q_shape)
+        if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
+            logger.debug(
+                f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
+            return [0, 0]
+
+        num_heads = q_shape_value[2]
+        head_size = q_shape_value[3]
+        hidden_size = num_heads * head_size
+
+        return num_heads, hidden_size
+
+    def create_decoder_attention_node(
+        self, inputs: str, outputs: str, type_mask: int, has_mask: int
+    ) -> Union[NodeProto, None]:
+        """Create an Attention node.
+
+        Args:
+            input (str): input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+
+        attention_node_name = self.model.create_node_name("decoder_Attention")
+        attention_node = helper.make_node(
+            "CustomQkvCrossToContext_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=attention_node_name,
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("scale", 1.0)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", has_mask)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("type_mask", type_mask)])
+
+        return attention_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        """
+         path1:
+
+         (query) ---------------->MatMul --> add -->softmax --->MatMul--->
+                                    /        /                    /
+         (key)   ---->Transpose -->         /                    /
+                                           /                    /
+         (mask)   ------------------------>                    /
+                                                              /
+         (value)--------------------------------------------->
+
+
+
+        path2:
+
+         (query) ---------------->MatMul ---------->softmax --->MatMul--->
+                                    /                             /
+         (key)   ---->Transpose -->                              /
+                                                                /
+                                                               /
+                                                              /
+         (value)--------------------------------------------->
+
+        """
+
+        start_node = node
+        qkv_paths = {
+            "path1": (
+                ["Add", "MatMul", "Transpose"],
+                [0, 0, 0],
+            ),  # float mask self attention,self attention key pass
+            "path2": (["MatMul", "Transpose"], [0, 0]),  # cross attention qery pass
+        }
+
+        qkv_nodes, qkv_path = self.match_parent_path_from_dict(start_node, qkv_paths)
+        if qkv_nodes is None:
+            logger.debug("fuse_attention: failed to match qkv path")
+            return
+        next_nodes = self.model.get_children(node)
+        if len(next_nodes) == 0:
+            return
+
+        if next_nodes[0].op_type != "MatMul":
+            return
+
+        second_matmul_node = next_nodes[0]
+        attention_inputs = None
+        attention_outputs = second_matmul_node.output
+        remove_nodes = [second_matmul_node, node]
+        if qkv_path == "path1":
+            (add_node, first_matmul_node, transpose_node) = qkv_nodes
+            transpose_nodes = self.model.get_parents(first_matmul_node)
+            q_input = transpose_nodes[0].output[0]
+            k_input = transpose_nodes[1].input[0]
+            v_input = second_matmul_node.input[1]
+            attention_inputs = [q_input, k_input, v_input]
+            remove_nodes.extend([add_node, first_matmul_node, transpose_nodes[1]])
+
+        if qkv_path == "path2":
+            (first_matmul_node, transpose_node) = qkv_nodes
+            transpose_nodes = self.model.get_parents(first_matmul_node)
+            q_input = transpose_nodes[0].output[0]
+            k_input = transpose_nodes[1].input[0]
+            v_input = second_matmul_node.input[1]
+            attention_inputs = [q_input, k_input, v_input]
+            remove_nodes.extend([first_matmul_node, transpose_nodes[1]])
+
+        has_mask = 0
+        type_mask = 4  # int32 mask
+
+        if qkv_path == "path1":
+            mask_input = add_node.input[0]
+            score_out = first_matmul_node.output[0]
+            if add_node.input[0] == score_out:
+                mask_input = add_node.input[1]
+            attention_inputs.append(mask_input)
+            has_mask = 1
+            type_mask = 3  # float mask
+
+        atten_node = self.create_decoder_attention_node(
+            attention_inputs, attention_outputs, type_mask, has_mask
+        )
+        self.nodes_to_add.append(atten_node)
+        self.node_name_to_graph_name[atten_node.name] = self.this_graph_name
+        self.nodes_to_remove.extend(remove_nodes)
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_utils.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_utils.py
index 5207f28f..4765c8f5 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_utils.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_utils.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -9,6 +25,7 @@ import numpy
 from numpy import array_equal, ndarray
 from onnx import NodeProto, TensorProto, helper, numpy_helper
 from onnx import onnx_pb as onnx_proto
+
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -20,12 +37,17 @@ class FusionUtils:
 
     def cast_graph_input_to_int32(self, input_name: str) -> Tuple[bool, str]:
         graph_input = self.model.find_graph_input(input_name)
-        if graph_input is not None and graph_input.type.tensor_type.elem_type != TensorProto.INT32:
+        if (
+            graph_input is not None
+            and graph_input.type.tensor_type.elem_type != TensorProto.INT32
+        ):
             cast_output, cast_node = self.cast_input_to_int32(input_name)
             logger.debug(f"Casted graph input {input_name} to int32")
             return True, cast_output
 
-        logger.debug(f"Did not cast graph input {input_name} to int32: found {graph_input is not None}")
+        logger.debug(
+            f"Did not cast graph input {input_name} to int32: found {graph_input is not None}"
+        )
         return False, input_name
 
     def cast_input_to_int32(self, input_name: str):
@@ -40,7 +62,9 @@ class FusionUtils:
                 inputs = [parent_node.input[0]]
 
         cast_node = helper.make_node("Cast", inputs=inputs, outputs=[cast_output])
-        cast_node.attribute.extend([helper.make_attribute("to", int(TensorProto.INT32))])
+        cast_node.attribute.extend(
+            [helper.make_attribute("to", int(TensorProto.INT32))]
+        )
         self.model.add_node(cast_node)
 
         return cast_output, cast_node
@@ -61,7 +85,9 @@ class FusionUtils:
                     self.model.replace_input_of_all_nodes(output_name, input_name)
 
     @staticmethod
-    def check_node_attribute(node, attribute_name: str, expected_value, default_value=None):
+    def check_node_attribute(
+        node, attribute_name: str, expected_value, default_value=None
+    ):
         """Verify that a node has expected value for an attribute.
 
         Args:
@@ -79,9 +105,9 @@ class FusionUtils:
                 value = helper.get_attribute_value(attr)
 
         if isinstance(expected_value, list):
-            return (isinstance(value, ndarray) or isinstance(value, list)) and array_equal(
-                expected_value, value, equal_nan=False
-            )
+            return (
+                isinstance(value, ndarray) or isinstance(value, list)
+            ) and array_equal(expected_value, value, equal_nan=False)
         else:
             return value == expected_value
 
@@ -94,13 +120,17 @@ class FusionUtils:
             tensor (TensorProto): transposed tensor
         """
         if not isinstance(tensor, onnx_proto.TensorProto):
-            raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor))
+            raise ValueError(
+                "Expected input type is an ONNX TensorProto but got %s" % type(tensor)
+            )
 
         if len(tensor.dims) != 2 or tensor.data_type != onnx_proto.TensorProto.INT8:
             raise ValueError("Only INT8 2-D tensors can be transposed")
 
         if tensor.raw_data:
-            int32_data = numpy.reshape(numpy.frombuffer(tensor.raw_data, dtype="int8"), tensor.dims)
+            int32_data = numpy.reshape(
+                numpy.frombuffer(tensor.raw_data, dtype="int8"), tensor.dims
+            )
             int32_transposed_data = numpy.transpose(int32_data, [1, 0])
             tensor.raw_data = int32_transposed_data.tobytes()
 
@@ -110,7 +140,9 @@ class FusionUtils:
         return tensor
 
     @staticmethod
-    def check_qdq_node_for_fusion(node: NodeProto, model: OnnxModel, allow_per_tensor_quantization_only=True):
+    def check_qdq_node_for_fusion(
+        node: NodeProto, model: OnnxModel, allow_per_tensor_quantization_only=True
+    ):
         """Verify if a provided QuantizeLinear (Q) / DequantizeLinear (DQ) node is a good candidate for fusion.
            It is a good candidate for fusion if:
            (1) The Q/DQ node is for per-tensor quantization if allow_per_tensor_quantization_only is `True`
@@ -131,7 +163,9 @@ class FusionUtils:
             return False
 
         # Not per-tensor quantization
-        scale_has_single_element = scale.ndim == 0 or (scale.ndim == 1 and scale.shape[0] == 1)
+        scale_has_single_element = scale.ndim == 0 or (
+            scale.ndim == 1 and scale.shape[0] == 1
+        )
         if allow_per_tensor_quantization_only and not scale_has_single_element:
             return False
 
@@ -168,9 +202,9 @@ class FusionUtils:
         value = self.model.get_constant_value(node.input[input_index])
 
         if isinstance(expected_value, list):
-            return (isinstance(value, ndarray) or isinstance(value, list)) and array_equal(
-                expected_value, value, equal_nan=False
-            )
+            return (
+                isinstance(value, ndarray) or isinstance(value, list)
+            ) and array_equal(expected_value, value, equal_nan=False)
         else:
             return value == expected_value
 
@@ -216,7 +250,9 @@ class FusionUtils:
             for node in nodes_to_remove:
                 if bool(set(node.output) & graph_output_names):
                     if not bool(set(node.input) & graph_input_names):
-                        self.model.replace_output_of_all_nodes(node.input[0], node.output[0])
+                        self.model.replace_output_of_all_nodes(
+                            node.input[0], node.output[0]
+                        )
                     else:
                         continue
                 else:
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_videobert_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_videobert_attention.py
index 1133877b..d3244b7a 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_videobert_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_videobert_attention.py
@@ -1,7 +1,24 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
+import math
 from enum import Enum
 from logging import getLogger
 from os import name
@@ -9,17 +26,18 @@ from sys import path
 from typing import Tuple, Union
 
 import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_options import AttentionMaskFormat
 from .fusion_utils import FusionUtils, NumpyHelper
-from onnx import NodeProto, TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
-import onnx
-import math
 
 logger = getLogger(__name__)
 
+
 class FusionVideoBertAttention(Fusion):
     """
     Fuse VideoBertAttention subgraph into one Attention node.
@@ -29,13 +47,19 @@ class FusionVideoBertAttention(Fusion):
         self,
         model: OnnxModel,
     ):
-        super().__init__(model, "CustomQKVToContextPluginDynamic_IxRT", ["CustomSkipLayerNormPluginDynamic_IxRT", "LayerNormalization"])
+        super().__init__(
+            model,
+            "CustomQKVToContextPluginDynamic_IxRT",
+            ["CustomSkipLayerNormPluginDynamic_IxRT", "LayerNormalization"],
+        )
 
         # Flags to show warning only once
         self.num_heads_warning = True
         self.hidden_size_warning = True
 
-    def get_num_heads_and_hidden_size(self, atten_matmul: NodeProto, div: NodeProto) -> Tuple[int, int]:
+    def get_num_heads_and_hidden_size(
+        self, atten_matmul: NodeProto, div: NodeProto
+    ) -> Tuple[int, int]:
         """Detect num_heads and hidden_size from a reshape node.
 
         Args:
@@ -48,7 +72,7 @@ class FusionVideoBertAttention(Fusion):
         # we assume that reshape fusion has done, so the shape is a tensor like [0, 0, num_heads, head_size]
         atten_matul_initializer = self.model.get_initializer(atten_matmul.input[1])
         div_initializer = self.model.get_initializer(div.input[1])
-        
+
         # 检查float_data是否为空
         if len(div_initializer.float_data) > 0:
             div_value = div_initializer.float_data[0]
@@ -60,13 +84,13 @@ class FusionVideoBertAttention(Fusion):
                 div_value = np.frombuffer(div_initializer.raw_data, dtype=dtype)[0]
             else:
                 raise ValueError("Data not found in the div_initializer")
-            
+
         atten_matul_shape_value = NumpyHelper.to_array(atten_matul_initializer).shape
-        head_dim = math.ceil(div_value*div_value)
+        head_dim = math.ceil(div_value * div_value)
         hidden_size = atten_matul_shape_value[0]
         num_heads = hidden_size // head_dim
 
-        return num_heads, hidden_size 
+        return num_heads, hidden_size
 
     def create_attention_node(
         self,
@@ -74,7 +98,7 @@ class FusionVideoBertAttention(Fusion):
         hidden_size: int,
         input: str,
         output: str,
-        matmul_qk_add: NodeProto
+        matmul_qk_add: NodeProto,
     ) -> Union[NodeProto, None]:
         """Create an Attention node.
 
@@ -90,11 +114,13 @@ class FusionVideoBertAttention(Fusion):
         assert num_heads > 0
 
         if hidden_size > 0 and (hidden_size % num_heads) != 0:
-            logger.debug(f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}")
+            logger.debug(
+                f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}"
+            )
             return None
 
         attention_node_name = self.model.create_node_name("Attention")
-        
+
         qk_bias = None
         has_mask = 0
         has_qk_bias = 0
@@ -106,13 +132,13 @@ class FusionVideoBertAttention(Fusion):
                 qk_bias_arr = qk_bias_arr.squeeze(0)
             has_neg_inf = np.isinf(qk_bias_arr) & (qk_bias_arr < 0)
             if np.any(has_neg_inf):
-                qk_bias_arr = np.where(qk_bias_arr == -np.inf, -100, 0.0).astype(np.float32)
+                qk_bias_arr = np.where(qk_bias_arr == -np.inf, -100, 0.0).astype(
+                    np.float32
+                )
             qk_bias.CopyFrom(numpy_helper.from_array(qk_bias_arr, qk_bias.name))
-        
-        attention_inputs = [
-            input
-        ]
-        
+
+        attention_inputs = [input]
+
         if qk_bias is not None:
             has_mask = 1
             attention_inputs.append(qk_bias.name)
@@ -126,12 +152,16 @@ class FusionVideoBertAttention(Fusion):
         attention_node.domain = "com.iluvatar"
         attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
         attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
-        attention_node.attribute.extend([helper.make_attribute("hidden_size", hidden_size)])
+        attention_node.attribute.extend(
+            [helper.make_attribute("hidden_size", hidden_size)]
+        )
         attention_node.attribute.extend([helper.make_attribute("has_mask", has_mask)])
         attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
         attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
-        attention_node.attribute.extend([helper.make_attribute("has_qk_bias", has_qk_bias)])
-        
+        attention_node.attribute.extend(
+            [helper.make_attribute("has_qk_bias", has_qk_bias)]
+        )
+
         return attention_node
 
     def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node):
@@ -145,8 +175,14 @@ class FusionVideoBertAttention(Fusion):
 
         # SkipLayerNormalization has two inputs, and one of them is the root input for attention.
         qkv_paths = {
-            "path1" : (["Add", "MatMul", "Reshape", "Transpose", "MatMul"], [0, None, 0, 0, 0]),
-            "path2" : (["Add", "MatMul", "Reshape", "Transpose", "MatMul"], [1, None, 0, 0, 0]),
+            "path1": (
+                ["Add", "MatMul", "Reshape", "Transpose", "MatMul"],
+                [0, None, 0, 0, 0],
+            ),
+            "path2": (
+                ["Add", "MatMul", "Reshape", "Transpose", "MatMul"],
+                [1, None, 0, 0, 0],
+            ),
         }
 
         qkv_nodes, qkv_path = self.match_parent_path_from_dict(start_node, qkv_paths)
@@ -155,7 +191,7 @@ class FusionVideoBertAttention(Fusion):
             logger.debug("fuse_attention: failed to match qkv path")
             return
 
-        if qkv_path in ['path1', 'path2']:
+        if qkv_path in ["path1", "path2"]:
             (_, atten_matmul, reshape_qkv, transpose_qkv, matmul_qkv) = qkv_nodes
 
         other_inputs = []
@@ -171,7 +207,7 @@ class FusionVideoBertAttention(Fusion):
 
         root_input = other_inputs[0]
         """
-        Match videobert              
+        Match videobert
         transpose/Add --> LayerNormalization -->  Attention -->     Add --> LayerNormalization
          |                                                        |
          |                                                        |
@@ -181,39 +217,42 @@ class FusionVideoBertAttention(Fusion):
         if transpose_before_layernorm is not None:
             node_children = input_name_to_nodes[transpose_before_layernorm.output[0]]
             for child in node_children:
-                if child is not None and child.op_type == 'LayerNormalization':
+                if child is not None and child.op_type == "LayerNormalization":
                     root_input = child.output[0]
 
         add_before_layernorm = self.model.match_parent(start_node, "Add", None)
         if add_before_layernorm is not None:
             node_children = input_name_to_nodes[add_before_layernorm.output[0]]
             for child in node_children:
-                if child is not None and child.op_type == 'LayerNormalization':
+                if child is not None and child.op_type == "LayerNormalization":
                     root_input = child.output[0]
 
         v_paths = {
-            "path1" : (["Transpose", "Reshape", "Slice", "Add", "MatMul"], [1, 0, 0, 0, None]) # videobert
+            "path1": (
+                ["Transpose", "Reshape", "Slice", "Add", "MatMul"],
+                [1, 0, 0, 0, None],
+            )  # videobert
         }
 
         v_nodes, v_path = self.match_parent_path_from_dict(matmul_qkv, v_paths)
-        if v_path == 'path1':
+        if v_path == "path1":
             (_, _, _, add_in_qkv, matmul_in_qkv) = v_nodes
 
         if v_nodes is None:
             logger.debug("fuse_attention: failed to match v path")
             return
-        
+
         qk_paths = {
             "path1": (["Softmax", "MatMul"], [0, 0]),
-            "path2": (["Softmax", "Add", "MatMul"], [0, 0, None])
+            "path2": (["Softmax", "Add", "MatMul"], [0, 0, None]),
         }
 
         qk_nodes, qk_path = self.match_parent_path_from_dict(matmul_qkv, qk_paths)
-        
+
         if qk_nodes is None:
             logger.debug("fuse_attention: failed to match qk path")
             return
-        
+
         matmul_qk_add = None
         if qk_path == "path1":
             (_, matmul_qk) = qk_nodes
@@ -221,45 +260,51 @@ class FusionVideoBertAttention(Fusion):
             (_, matmul_qk_add, matmul_qk) = qk_nodes
 
         q_paths = {
-            "path1" : (["Transpose", "Reshape", "Slice"], [0, 0, 0]),
-            "path2" : (["Div", "Transpose", "Reshape", "Slice"], [0, 0, 0, 0])
+            "path1": (["Transpose", "Reshape", "Slice"], [0, 0, 0]),
+            "path2": (["Div", "Transpose", "Reshape", "Slice"], [0, 0, 0, 0]),
         }
         q_nodes, q_path = self.match_parent_path_from_dict(matmul_qk, q_paths)
         if q_nodes is None:
             logger.debug("fuse_attention: failed to match q path")
             return
-        
-        if q_path == 'path1':
+
+        if q_path == "path1":
             (_, _, slice_q) = q_nodes
         else:
             (div, _, _, slice_q) = q_nodes
 
         k_paths = {
-            "path1" : (["Transpose", "Reshape", "Slice"], [1, 0, 0]),
-            "path2" : (["Div", "Transpose", "Reshape", "Slice"], [1, 0, 0, 0])
+            "path1": (["Transpose", "Reshape", "Slice"], [1, 0, 0]),
+            "path2": (["Div", "Transpose", "Reshape", "Slice"], [1, 0, 0, 0]),
         }
         k_nodes, k_path = self.match_parent_path_from_dict(matmul_qk, k_paths)
 
         if k_nodes is None:
             logger.debug("fuse_attention: failed to match k path")
             return
-        
-        if k_path == 'path1':
+
+        if k_path == "path1":
             (_, _, slice_k) = k_nodes
         else:
             (div, _, _, slice_k) = k_nodes
-        
-        if matmul_in_qkv.input[0] == root_input and slice_q.input[0] == add_in_qkv.output[0] and slice_k.input[0] == add_in_qkv.output[0]:
+
+        if (
+            matmul_in_qkv.input[0] == root_input
+            and slice_q.input[0] == add_in_qkv.output[0]
+            and slice_k.input[0] == add_in_qkv.output[0]
+        ):
             attention_last_node = reshape_qkv
 
-            num_heads, hidden_size = self.get_num_heads_and_hidden_size(atten_matmul, div)
-            
+            num_heads, hidden_size = self.get_num_heads_and_hidden_size(
+                atten_matmul, div
+            )
+
             new_node = self.create_attention_node(
                 num_heads,
                 hidden_size,
                 add_in_qkv.output[0],
                 attention_last_node.output[0],
-                matmul_qk_add
+                matmul_qk_add,
             )
             if new_node is None:
                 return
@@ -267,34 +312,41 @@ class FusionVideoBertAttention(Fusion):
             self.nodes_to_add.append(new_node)
             self.node_name_to_graph_name[new_node.name] = self.this_graph_name
 
-            self.nodes_to_remove.extend([attention_last_node, transpose_qkv, matmul_qkv])
+            self.nodes_to_remove.extend(
+                [attention_last_node, transpose_qkv, matmul_qkv]
+            )
             self.nodes_to_remove.extend(qk_nodes)
             self.nodes_to_remove.extend(q_nodes)
             self.nodes_to_remove.extend(k_nodes)
             self.nodes_to_remove.extend(v_nodes[:-2])
-            
+
             # fuse head and tail transpose
             if transpose_before_layernorm is not None:
-                node_children = input_name_to_nodes[transpose_before_layernorm.output[0]]
+                node_children = input_name_to_nodes[
+                    transpose_before_layernorm.output[0]
+                ]
                 for child in node_children:
                     for i, input in enumerate(child.input):
                         if child.input[i] == transpose_before_layernorm.output[0]:
                             child.input[i] = transpose_before_layernorm.input[0]
                 self.nodes_to_remove.extend([transpose_before_layernorm])
-                
+
                 node = transpose_before_layernorm
                 while True:
                     found = False
                     node_children = input_name_to_nodes[node.output[0]]
                     for child in node_children:
-                        if child is not None and child.op_type in ['SkipLayerNorm', "Add"]:
+                        if child is not None and child.op_type in [
+                            "SkipLayerNorm",
+                            "Add",
+                        ]:
                             node = child
                             found = True
                             break
                     if not found:
                         break
                 node_children = input_name_to_nodes[node.output[0]]
-                if len(node_children) == 1 and node_children[0].op_type == 'Transpose':
+                if len(node_children) == 1 and node_children[0].op_type == "Transpose":
                     transpose_node = node_children[0]
                     transpose_children = input_name_to_nodes[transpose_node.output[0]]
                     for i, input in enumerate(transpose_children[0].input):
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_vit_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_vit_attention.py
index e6e16f17..f1a5410b 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_vit_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_vit_attention.py
@@ -1,8 +1,25 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 import math
+from typing import Dict
 from enum import Enum
 from logging import getLogger
 from os import name
@@ -352,3 +369,101 @@ class FusionVITAttention(Fusion):
             # Use prune graph to remove mask nodes since they are shared by all attention nodes.
             # self.nodes_to_remove.extend(mask_nodes)
             # self.prune_graph = True
+
+
+class FusionTorchvisionVITAttention(Fusion):
+    """
+    Fuse VITAttention subgraph into one Attention node.
+    """
+
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "CustomQKVToContextPluginDynamic_IxRT", "CustomFCPluginDynamic_IxRT"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+        [Root] -->  CustomFCPluginDynamic_IxRT-->  CustomQKVToContextPluginDynamic_IxRT  --> CustomFCPluginDynamic_IxRT
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        parent = self.model.get_parents(node, output_name_to_node)
+        
+        if len(children) != 1:
+            return
+        if len(parent) != 1:
+            return
+
+        fc_first_node = None
+        for par in parent:
+            fc_first_node = self.model.find_first_parent_by_type(
+                par, "CustomFCPluginDynamic_IxRT", output_name_to_node, recursive=True
+            )
+            if fc_first_node is not None:
+                break
+        if fc_first_node is None:
+            return
+        
+        start_node = node
+        
+        # v path
+        v_nodes = self.model.match_parent_path(
+            start_node,
+            ["Transpose", "MatMul", "Reshape", "Transpose", "Reshape", "Gather", "Squeeze", "Transpose", "Unsqueeze", "Reshape"],
+            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        # path1, q and k path
+        q_nodes = self.model.match_parent_path(
+            start_node,
+            ["Transpose", "MatMul", "Softmax", "MatMul", "Mul", "Transpose", "Reshape", "Transpose", "Reshape", "Gather", "Squeeze", "Transpose", "Unsqueeze", "Reshape"],
+            [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        k_nodes = self.model.match_parent_path(
+            start_node,
+            ["Transpose", "MatMul", "Softmax", "MatMul", "Mul", "Reshape", "Transpose", "Reshape", "Gather", "Squeeze", "Transpose", "Unsqueeze", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        if v_nodes is None:
+            return
+        
+        if v_nodes and q_nodes and k_nodes:
+            subgraph_nodes = []
+            subgraph_nodes.extend(q_nodes)
+            subgraph_nodes.extend(k_nodes)
+            subgraph_nodes.extend(v_nodes)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            
+            hidden_size = start_node.attribute[0].i
+            _, mul_val = self.model.get_constant_input(k_nodes[4])
+            num_heads = hidden_size // (math.floor(1.0 / (mul_val * mul_val)) * math.floor(1.0 / (mul_val * mul_val)))
+            
+        attention_node = helper.make_node(
+            "CustomQKVToContextPluginDynamic_IxRT",
+            inputs=[fc_first_node.output[0]],
+            outputs=[start_node.input[0]],
+            name=self.model.create_node_name(
+                "TorchvisionVitAttention", name_prefix="TorchvisionVitAttention"
+            ),
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
+        attention_node.attribute.extend([helper.make_attribute("hidden_size", hidden_size)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", 0)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("has_qk_bias", 0)])
+        
+        self.nodes_to_remove.extend(subgraph_nodes_unique)
+        
+        self.nodes_to_add.append(attention_node)
+        self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_xsoftmax.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_xsoftmax.py
index 85d9cb2d..df55ba64 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_xsoftmax.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_xsoftmax.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_yolov5_decoder.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_yolov5_decoder.py
index ba66693c..f2d07ce9 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_yolov5_decoder.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_yolov5_decoder.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/onnx_model.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/onnx_model.py
index b176058c..0b76f660 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/onnx_model.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/onnx_model.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/shape_infer_helper.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/shape_infer_helper.py
index 11144402..a48b53db 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/shape_infer_helper.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/shape_infer_helper.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -15,13 +31,24 @@ if os.path.exists(os.path.join(file_path, "../tools/symbolic_shape_infer.py")):
 else:
     sys.path.append(os.path.join(file_path, ".."))
 
-from .symbolic_shape_infer import SymbolicShapeInference, get_shape_from_type_proto, sympy
+from .symbolic_shape_infer import (
+    SymbolicShapeInference,
+    get_shape_from_type_proto,
+    sympy,
+)
 
 logger = logging.getLogger(__name__)
 
 
 class SymbolicShapeInferenceHelper(SymbolicShapeInference):
-    def __init__(self, model, verbose=0, int_max=2**31 - 1, auto_merge=True, guess_output_rank=False):
+    def __init__(
+        self,
+        model,
+        verbose=0,
+        int_max=2**31 - 1,
+        auto_merge=True,
+        guess_output_rank=False,
+    ):
         super().__init__(int_max, auto_merge, guess_output_rank, verbose)
         self.model_ = model
         self.all_shapes_inferred_: bool = False
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/symbolic_shape_infer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/symbolic_shape_infer.py
index e5157f90..2311ad57 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/symbolic_shape_infer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/symbolic_shape_infer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
@@ -24,7 +40,11 @@ def get_attribute(node, attr_name, default_value=None):
 
 
 def get_dim_from_proto(dim):
-    return getattr(dim, dim.WhichOneof("value")) if type(dim.WhichOneof("value")) == str else None
+    return (
+        getattr(dim, dim.WhichOneof("value"))
+        if type(dim.WhichOneof("value")) == str
+        else None
+    )
 
 
 def is_sequence(type_proto):
@@ -61,11 +81,16 @@ def make_named_value_info(name):
 
 
 def get_shape_from_sympy_shape(sympy_shape):
-    return [None if i is None else (int(i) if is_literal(i) else str(i)) for i in sympy_shape]
+    return [
+        None if i is None else (int(i) if is_literal(i) else str(i))
+        for i in sympy_shape
+    ]
 
 
 def is_literal(dim):
-    return type(dim) in [int, np.int64, np.int32, sympy.Integer] or (hasattr(dim, "is_number") and dim.is_number)
+    return type(dim) in [int, np.int64, np.int32, sympy.Integer] or (
+        hasattr(dim, "is_number") and dim.is_number
+    )
 
 
 def handle_negative_axis(axis, rank):
@@ -217,7 +242,12 @@ class SymbolicShapeInference:
         self.prefix_ = prefix
 
     def _add_suggested_merge(self, symbols, apply=False):
-        assert all([(type(s) == str and s in self.symbolic_dims_) or is_literal(s) for s in symbols])
+        assert all(
+            [
+                (type(s) == str and s in self.symbolic_dims_) or is_literal(s)
+                for s in symbols
+            ]
+        )
         symbols = set(symbols)
         for k, v in self.suggested_merge_.items():
             if k in symbols:
@@ -243,7 +273,11 @@ class SymbolicShapeInference:
         # when nothing to map to, use the shorter one
         if map_to is None:
             if self.verbose_ > 0:
-                logger.warning("Potential unsafe merge between symbolic expressions: ({})".format(",".join(symbols)))
+                logger.warning(
+                    "Potential unsafe merge between symbolic expressions: ({})".format(
+                        ",".join(symbols)
+                    )
+                )
             symbols_list = list(symbols)
             lens = [len(s) for s in symbols_list]
             map_to = symbols_list[lens.index(min(lens))]
@@ -264,7 +298,9 @@ class SymbolicShapeInference:
     def _apply_suggested_merge(self, graph_input_only=False):
         if not self.suggested_merge_:
             return
-        for i in list(self.out_mp_.graph.input) + ([] if graph_input_only else list(self.out_mp_.graph.value_info)):
+        for i in list(self.out_mp_.graph.input) + (
+            [] if graph_input_only else list(self.out_mp_.graph.value_info)
+        ):
             for d in i.type.tensor_type.shape.dim:
                 if d.dim_param in self.suggested_merge_:
                     v = self.suggested_merge_[d.dim_param]
@@ -284,7 +320,9 @@ class SymbolicShapeInference:
                 [
                     (
                         i.name,
-                        helper.make_tensor_value_info(i.name, i.data_type, list(i.dims)),
+                        helper.make_tensor_value_info(
+                            i.name, i.data_type, list(i.dims)
+                        ),
                     )
                     for i in self.out_mp_.graph.initializer
                 ]
@@ -296,7 +334,9 @@ class SymbolicShapeInference:
             if self.auto_merge_:
                 unique_dims = list(set(dims))
                 is_int = [is_literal(d) for d in unique_dims]
-                assert sum(is_int) <= 1  # if there are more than 1 unique ints, something is wrong
+                assert (
+                    sum(is_int) <= 1
+                )  # if there are more than 1 unique ints, something is wrong
                 if sum(is_int) == 1:
                     int_dim = is_int.index(1)
                     if self.verbose_ > 0:
@@ -310,13 +350,19 @@ class SymbolicShapeInference:
                     return unique_dims[int_dim]
                 else:
                     if self.verbose_ > 0:
-                        logger.debug("dim {} has been mergd with dim {}".format(unique_dims[1:], unique_dims[0]))
+                        logger.debug(
+                            "dim {} has been mergd with dim {}".format(
+                                unique_dims[1:], unique_dims[0]
+                            )
+                        )
                     return dims[0]
             else:
                 return None
         if all([d == dims[0] for d in dims]):
             return dims[0]
-        merged = [self.suggested_merge_[d] if d in self.suggested_merge_ else d for d in dims]
+        merged = [
+            self.suggested_merge_[d] if d in self.suggested_merge_ else d for d in dims
+        ]
         if all([d == merged[0] for d in merged]):
             assert merged[0] in self.symbolic_dims_
             return merged[0]
@@ -345,7 +391,12 @@ class SymbolicShapeInference:
                     if self.auto_merge_:
                         self._add_suggested_merge([dim1, dim2], apply=True)
                     else:
-                        logger.warning("unsupported broadcast between " + str(dim1) + " " + str(dim2))
+                        logger.warning(
+                            "unsupported broadcast between "
+                            + str(dim1)
+                            + " "
+                            + str(dim2)
+                        )
             new_shape = [new_dim] + new_shape
         return new_shape
 
@@ -378,7 +429,11 @@ class SymbolicShapeInference:
     def _get_value(self, node, idx):
         name = node.input[idx]
         assert name in self.sympy_data_ or name in self.initializers_
-        return self.sympy_data_[name] if name in self.sympy_data_ else numpy_helper.to_array(self.initializers_[name])
+        return (
+            self.sympy_data_[name]
+            if name in self.sympy_data_
+            else numpy_helper.to_array(self.initializers_[name])
+        )
 
     def _try_get_value(self, node, idx):
         if idx >= len(node.input):
@@ -395,7 +450,9 @@ class SymbolicShapeInference:
                 if str_dim in self.suggested_merge_:
                     if is_literal(self.suggested_merge_[str_dim]):
                         continue  # no need to create dim for literals
-                    new_sympy_shape[i] = self.symbolic_dims_[self.suggested_merge_[str_dim]]
+                    new_sympy_shape[i] = self.symbolic_dims_[
+                        self.suggested_merge_[str_dim]
+                    ]
                 else:
                     # add new_dim if it's a computational expression
                     if not str(new_dim) in self.symbolic_dims_:
@@ -456,24 +513,38 @@ class SymbolicShapeInference:
                 vi.name = o
             self.known_vi_[o] = vi
 
-    def _onnx_infer_subgraph(self, node, subgraph, use_node_input=True, inc_subgraph_id=True):
+    def _onnx_infer_subgraph(
+        self, node, subgraph, use_node_input=True, inc_subgraph_id=True
+    ):
         if self.verbose_ > 2:
             logger.debug(
-                "Inferencing subgraph of node {} with output({}...): {}".format(node.name, node.output[0], node.op_type)
+                "Inferencing subgraph of node {} with output({}...): {}".format(
+                    node.name, node.output[0], node.op_type
+                )
             )
         # node inputs are not passed directly to the subgraph
         # it's up to the node dispatcher to prepare subgraph input
         # for example, with Scan/Loop, subgraph input shape would be trimmed from node input shape
         # besides, inputs in subgraph could shadow implicit inputs
-        subgraph_inputs = set([i.name for i in list(subgraph.initializer) + list(subgraph.input)])
-        subgraph_implicit_input = set([name for name in self.known_vi_.keys() if not name in subgraph_inputs])
+        subgraph_inputs = set(
+            [i.name for i in list(subgraph.initializer) + list(subgraph.input)]
+        )
+        subgraph_implicit_input = set(
+            [name for name in self.known_vi_.keys() if not name in subgraph_inputs]
+        )
         tmp_graph = helper.make_graph(
             list(subgraph.node),
             "tmp",
             list(subgraph.input) + [self.known_vi_[i] for i in subgraph_implicit_input],
             [make_named_value_info(i.name) for i in subgraph.output],
         )
-        tmp_graph.initializer.extend([i for i in self.out_mp_.graph.initializer if i.name in subgraph_implicit_input])
+        tmp_graph.initializer.extend(
+            [
+                i
+                for i in self.out_mp_.graph.initializer
+                if i.name in subgraph_implicit_input
+            ]
+        )
         tmp_graph.initializer.extend(subgraph.initializer)
         self.tmp_mp_.graph.CopyFrom(tmp_graph)
 
@@ -491,12 +562,16 @@ class SymbolicShapeInference:
         symbolic_shape_inference._preprocess(self.tmp_mp_)
         symbolic_shape_inference.suggested_merge_ = self.suggested_merge_.copy()
         while symbolic_shape_inference.run_:
-            all_shapes_inferred = symbolic_shape_inference._infer_impl(self.sympy_data_.copy())
+            all_shapes_inferred = symbolic_shape_inference._infer_impl(
+                self.sympy_data_.copy()
+            )
         symbolic_shape_inference._update_output_from_vi()
         if use_node_input:
             # if subgraph uses node input, it needs to update to merged dims
             subgraph.ClearField("input")
-            subgraph.input.extend(symbolic_shape_inference.out_mp_.graph.input[: len(node.input)])
+            subgraph.input.extend(
+                symbolic_shape_inference.out_mp_.graph.input[: len(node.input)]
+            )
         subgraph.ClearField("output")
         subgraph.output.extend(symbolic_shape_inference.out_mp_.graph.output)
         subgraph.ClearField("value_info")
@@ -504,9 +579,18 @@ class SymbolicShapeInference:
         subgraph.ClearField("node")
         subgraph.node.extend(symbolic_shape_inference.out_mp_.graph.node)
         # for new symbolic dims from subgraph output, add to main graph symbolic dims
-        subgraph_shapes = [get_shape_from_value_info(o) for o in symbolic_shape_inference.out_mp_.graph.output]
+        subgraph_shapes = [
+            get_shape_from_value_info(o)
+            for o in symbolic_shape_inference.out_mp_.graph.output
+        ]
         subgraph_new_symbolic_dims = set(
-            [d for s in subgraph_shapes if s for d in s if type(d) == str and not d in self.symbolic_dims_]
+            [
+                d
+                for s in subgraph_shapes
+                if s
+                for d in s
+                if type(d) == str and not d in self.symbolic_dims_
+            ]
         )
         new_dims = {}
         for d in subgraph_new_symbolic_dims:
@@ -597,7 +681,9 @@ class SymbolicShapeInference:
         )
 
     def _new_symbolic_shape(self, rank, node, out_idx=0):
-        return [self._new_symbolic_dim_from_output(node, out_idx, i) for i in range(rank)]
+        return [
+            self._new_symbolic_dim_from_output(node, out_idx, i) for i in range(rank)
+        ]
 
     def _compute_conv_pool_shape(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
@@ -625,14 +711,18 @@ class SymbolicShapeInference:
 
         dilations = get_attribute(node, "dilations", [1] * rank)
         strides = get_attribute(node, "strides", [1] * rank)
-        effective_kernel_shape = [(k - 1) * d + 1 for k, d in zip(kernel_shape, dilations)]
+        effective_kernel_shape = [
+            (k - 1) * d + 1 for k, d in zip(kernel_shape, dilations)
+        ]
         pads = get_attribute(node, "pads")
         if pads is None:
             pads = [0] * (2 * rank)
             auto_pad = get_attribute(node, "auto_pad", b"NOTSET").decode("utf-8")
             if auto_pad != "VALID" and auto_pad != "NOTSET":
                 try:
-                    residual = [sympy.Mod(d, s) for d, s in zip(sympy_shape[-rank:], strides)]
+                    residual = [
+                        sympy.Mod(d, s) for d, s in zip(sympy_shape[-rank:], strides)
+                    ]
                     total_pads = [
                         max(0, (k - s) if r == 0 else (k - r))
                         for k, s, r in zip(effective_kernel_shape, strides, residual)
@@ -659,7 +749,9 @@ class SymbolicShapeInference:
                     (effective_input_size - effective_kernel_shape[i]) / strides[i]
                 )
             else:
-                strided_kernel_positions = (effective_input_size - effective_kernel_shape[i]) // strides[i]
+                strided_kernel_positions = (
+                    effective_input_size - effective_kernel_shape[i]
+                ) // strides[i]
             sympy_shape[-rank + i] = strided_kernel_positions + 1
         return sympy_shape
 
@@ -688,7 +780,11 @@ class SymbolicShapeInference:
         else:
             lhs_reduce_dim = -1
             rhs_reduce_dim = -2
-            new_shape = self._broadcast_shapes(lhs_shape[:-2], rhs_shape[:-2]) + [lhs_shape[-2]] + [rhs_shape[-1]]
+            new_shape = (
+                self._broadcast_shapes(lhs_shape[:-2], rhs_shape[:-2])
+                + [lhs_shape[-2]]
+                + [rhs_shape[-1]]
+            )
         # merge reduce dim
         self._check_merged_dims(
             [lhs_shape[lhs_reduce_dim], rhs_shape[rhs_reduce_dim]],
@@ -698,17 +794,23 @@ class SymbolicShapeInference:
             # infer output_dtype from input type when not specified
             output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], output_dtype, new_shape)
+        )
 
     def _fuse_tensor_type(self, node, out_idx, dst_type, src_type):
         """
         update dst_tensor_type to be compatible with src_tensor_type when dimension mismatches
         """
         dst_tensor_type = (
-            dst_type.sequence_type.elem_type.tensor_type if is_sequence(dst_type) else dst_type.tensor_type
+            dst_type.sequence_type.elem_type.tensor_type
+            if is_sequence(dst_type)
+            else dst_type.tensor_type
         )
         src_tensor_type = (
-            src_type.sequence_type.elem_type.tensor_type if is_sequence(src_type) else src_type.tensor_type
+            src_type.sequence_type.elem_type.tensor_type
+            if is_sequence(src_type)
+            else src_type.tensor_type
         )
         if dst_tensor_type.elem_type != src_tensor_type.elem_type:
             node_id = node.name if node.name else node.op_type
@@ -718,13 +820,17 @@ class SymbolicShapeInference:
                 f"{onnx.onnx_pb.TensorProto.DataType.Name(src_tensor_type.elem_type)}"
             )
         if dst_tensor_type.HasField("shape"):
-            for di, ds in enumerate(zip(dst_tensor_type.shape.dim, src_tensor_type.shape.dim)):
+            for di, ds in enumerate(
+                zip(dst_tensor_type.shape.dim, src_tensor_type.shape.dim)
+            ):
                 if ds[0] != ds[1]:
                     # create a new symbolic dimension for node/out_idx/mismatch dim id in dst_tensor_type for tensor_type
                     # for sequence_type, clear the dimension
                     new_dim = onnx.TensorShapeProto.Dimension()
                     if not is_sequence(dst_type):
-                        new_dim.dim_param = str(self._new_symbolic_dim_from_output(node, out_idx, di))
+                        new_dim.dim_param = str(
+                            self._new_symbolic_dim_from_output(node, out_idx, di)
+                        )
                     dst_tensor_type.shape.dim[di].CopyFrom(new_dim)
         else:
             dst_tensor_type.CopyFrom(src_tensor_type)
@@ -749,10 +855,18 @@ class SymbolicShapeInference:
             "Floor": lambda l: sympy.floor(l[0]),
             "Max": lambda l: l[1]
             if is_literal(l[0]) and int(l[0]) < -self.int_max_
-            else (l[0] if is_literal(l[1]) and int(l[1]) < -self.int_max_ else sympy.Max(l[0], l[1])),
+            else (
+                l[0]
+                if is_literal(l[1]) and int(l[1]) < -self.int_max_
+                else sympy.Max(l[0], l[1])
+            ),
             "Min": lambda l: l[1]
             if is_literal(l[0]) and int(l[0]) > self.int_max_
-            else (l[0] if is_literal(l[1]) and int(l[1]) > self.int_max_ else sympy.Min(l[0], l[1])),
+            else (
+                l[0]
+                if is_literal(l[1]) and int(l[1]) > self.int_max_
+                else sympy.Min(l[0], l[1])
+            ),
             "Mul": lambda l: l[0] * l[1],
             "Sub": lambda l: l[0] - l[1],
             "Where": lambda l: l[1] if l[0] else l[2],
@@ -771,7 +885,11 @@ class SymbolicShapeInference:
         else:
             output_type = onnx.TensorProto.STRING
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_type, self._get_shape(node, 0)))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], output_type, self._get_shape(node, 0)
+            )
+        )
 
     def _infer_Compress(self, node):
         input_shape = self._get_shape(node, 0)
@@ -817,7 +935,11 @@ class SymbolicShapeInference:
         for d in range(len(sympy_shape)):
             if d == axis:
                 continue
-            dims = [self._get_shape(node, i_idx)[d] for i_idx in range(len(node.input)) if self._get_shape(node, i_idx)]
+            dims = [
+                self._get_shape(node, i_idx)[d]
+                for i_idx in range(len(node.input))
+                if self._get_shape(node, i_idx)
+            ]
             if all([d == dims[0] for d in dims]):
                 continue
             merged = self._merge_symbols(dims)
@@ -837,7 +959,9 @@ class SymbolicShapeInference:
     def _infer_ConcatFromSequence(self, node):
         seq_shape = self._get_shape(node, 0)
         new_axis = 1 if get_attribute(node, "new_axis") else 0
-        axis = handle_negative_axis(get_attribute(node, "axis"), len(seq_shape) + new_axis)
+        axis = handle_negative_axis(
+            get_attribute(node, "axis"), len(seq_shape) + new_axis
+        )
         concat_dim = str(self._new_symbolic_dim_from_output(node, 0, axis))
         new_shape = seq_shape
         if new_axis:
@@ -848,7 +972,9 @@ class SymbolicShapeInference:
         vi.CopyFrom(
             helper.make_tensor_value_info(
                 node.output[0],
-                self.known_vi_[node.input[0]].type.sequence_type.elem_type.tensor_type.elem_type,
+                self.known_vi_[
+                    node.input[0]
+                ].type.sequence_type.elem_type.tensor_type.elem_type,
                 new_shape,
             )
         )
@@ -865,7 +991,9 @@ class SymbolicShapeInference:
                 sympy_shape = [sympy_shape]
             self._update_computed_dims(sympy_shape)
             # update sympy data if output type is int, and shape is known
-            if vi.type.tensor_type.elem_type == onnx.TensorProto.INT64 and all([is_literal(x) for x in sympy_shape]):
+            if vi.type.tensor_type.elem_type == onnx.TensorProto.INT64 and all(
+                [is_literal(x) for x in sympy_shape]
+            ):
                 self.sympy_data_[node.output[0]] = np.ones(
                     [int(x) for x in sympy_shape], dtype=np.int64
                 ) * numpy_helper.to_array(get_attribute(node, "value", 0))
@@ -954,7 +1082,9 @@ class SymbolicShapeInference:
 
         output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_sympy_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], output_dtype, new_sympy_shape)
+        )
 
     def _infer_Expand(self, node):
         expand_to_shape = as_list(self._try_get_value(node, 1), keep_none=True)
@@ -962,7 +1092,9 @@ class SymbolicShapeInference:
             # new_shape's dim can come from shape value
             self._update_computed_dims(expand_to_shape)
             shape = self._get_shape(node, 0)
-            new_shape = self._broadcast_shapes(shape, get_shape_from_sympy_shape(expand_to_shape))
+            new_shape = self._broadcast_shapes(
+                shape, get_shape_from_sympy_shape(expand_to_shape)
+            )
             vi = self.known_vi_[node.output[0]]
             vi.CopyFrom(
                 helper.make_tensor_value_info(
@@ -985,7 +1117,11 @@ class SymbolicShapeInference:
             )
         )
         # for 1D input, do some sympy compute
-        if node.input[0] in self.sympy_data_ and len(data_shape) == 1 and 0 == get_attribute(node, "axis", 0):
+        if (
+            node.input[0] in self.sympy_data_
+            and len(data_shape) == 1
+            and 0 == get_attribute(node, "axis", 0)
+        ):
             idx = self._try_get_value(node, 1)
             if idx is not None:
                 data = self.sympy_data_[node.input[0]]
@@ -1040,24 +1176,32 @@ class SymbolicShapeInference:
                 subgraphs[0].CopyFrom(subgraphs[1])
 
         for i_sub, subgraph in enumerate(subgraphs):
-            subgraph_infer = self._onnx_infer_subgraph(node, subgraph, use_node_input=False)
+            subgraph_infer = self._onnx_infer_subgraph(
+                node, subgraph, use_node_input=False
+            )
             for i_out in range(len(node.output)):
                 vi = self.known_vi_[node.output[i_out]]
                 if i_sub == 0:
                     vi.CopyFrom(subgraph.output[i_out])
                     vi.name = node.output[i_out]
                 else:
-                    self._fuse_tensor_type(node, i_out, vi.type, subgraph.output[i_out].type)
+                    self._fuse_tensor_type(
+                        node, i_out, vi.type, subgraph.output[i_out].type
+                    )
 
                 # pass on sympy data from subgraph, if cond is constant
                 if cond is not None and i_sub == (0 if as_scalar(cond) > 0 else 1):
                     if subgraph.output[i_out].name in subgraph_infer.sympy_data_:
-                        self.sympy_data_[vi.name] = subgraph_infer.sympy_data_[subgraph.output[i_out].name]
+                        self.sympy_data_[vi.name] = subgraph_infer.sympy_data_[
+                            subgraph.output[i_out].name
+                        ]
 
     def _infer_Loop(self, node):
         subgraph = get_attribute(node, "body")
         assert len(subgraph.input) == len(node.input)
-        num_loop_carried = len(node.input) - 2  # minus the length and initial loop condition
+        num_loop_carried = (
+            len(node.input) - 2
+        )  # minus the length and initial loop condition
         # when sequence_type is used as loop carried input
         # needs to run subgraph infer twice if the tensor shape in sequence contains None
         for i, si in enumerate(subgraph.input):
@@ -1079,7 +1223,9 @@ class SymbolicShapeInference:
                     # copy shape from output to input
                     # note that loop input is [loop_len, cond, input_0, input_1, ...]
                     # while loop output is [cond, output_0, output_1, ...]
-                    subgraph.input[i_out + 1].type.sequence_type.elem_type.CopyFrom(so.type.sequence_type.elem_type)
+                    subgraph.input[i_out + 1].type.sequence_type.elem_type.CopyFrom(
+                        so.type.sequence_type.elem_type
+                    )
                     need_second_infer = True
             else:
                 si = subgraph.input[i_out + 1]
@@ -1087,7 +1233,9 @@ class SymbolicShapeInference:
                 for di, dims in enumerate(zip(si_shape, so_shape)):
                     if dims[0] != dims[1]:
                         new_dim = onnx.TensorShapeProto.Dimension()
-                        new_dim.dim_param = str(self._new_symbolic_dim_from_output(node, i_out, di))
+                        new_dim.dim_param = str(
+                            self._new_symbolic_dim_from_output(node, i_out, di)
+                        )
                         si.type.tensor_type.shape.dim[di].CopyFrom(new_dim)
                         so.type.tensor_type.shape.dim[di].CopyFrom(new_dim)
                         need_second_infer = True
@@ -1105,9 +1253,13 @@ class SymbolicShapeInference:
         loop_iter_dim = str(self._new_symbolic_dim_from_output(node))
         for i in range(len(node.output)):
             vi = self.known_vi_[node.output[i]]
-            vi.CopyFrom(subgraph.output[i + 1])  # first subgraph output is condition, not in node output
+            vi.CopyFrom(
+                subgraph.output[i + 1]
+            )  # first subgraph output is condition, not in node output
             if i >= num_loop_carried:
-                assert not is_sequence(vi.type)  # TODO: handle loop accumulation in sequence_type
+                assert not is_sequence(
+                    vi.type
+                )  # TODO: handle loop accumulation in sequence_type
                 subgraph_vi_dim = subgraph.output[i + 1].type.tensor_type.shape.dim
                 vi.type.tensor_type.shape.ClearField("dim")
                 vi_dim = vi.type.tensor_type.shape.dim
@@ -1124,14 +1276,22 @@ class SymbolicShapeInference:
     def _infer_NonMaxSuppression(self, node):
         selected = str(self._new_symbolic_dim_from_output(node))
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, [selected, 3]))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], onnx.TensorProto.INT64, [selected, 3]
+            )
+        )
 
     def _infer_NonZero(self, node):
         input_rank = self._get_shape_rank(node, 0)
         # create a new symbolic dimension for NonZero output
         nz_len = str(self._new_symbolic_dim_from_output(node, 0, 1))
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], vi.type.tensor_type.elem_type, [input_rank, nz_len]))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], vi.type.tensor_type.elem_type, [input_rank, nz_len]
+            )
+        )
 
     def _infer_OneHot(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
@@ -1140,7 +1300,11 @@ class SymbolicShapeInference:
         axis = handle_negative_axis(axis, len(sympy_shape) + 1)
         new_shape = get_shape_from_sympy_shape(
             sympy_shape[:axis]
-            + [self._new_symbolic_dim_from_output(node) if not is_literal(depth) else depth]
+            + [
+                self._new_symbolic_dim_from_output(node)
+                if not is_literal(depth)
+                else depth
+            ]
             + sympy_shape[axis:]
         )
         vi = self.known_vi_[node.output[0]]
@@ -1164,7 +1328,8 @@ class SymbolicShapeInference:
         if pads is not None:
             assert len(pads) == 2 * rank
             new_sympy_shape = [
-                d + pad_up + pad_down for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:])
+                d + pad_up + pad_down
+                for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:])
             ]
             self._update_computed_dims(new_sympy_shape)
         else:
@@ -1174,7 +1339,9 @@ class SymbolicShapeInference:
 
         vi = self.known_vi_[node.output[0]]
         vi.CopyFrom(
-            helper.make_tensor_value_info(node.output[0], output_tp, get_shape_from_sympy_shape(new_sympy_shape))
+            helper.make_tensor_value_info(
+                node.output[0], output_tp, get_shape_from_sympy_shape(new_sympy_shape)
+            )
         )
 
     def _infer_Pool(self, node):
@@ -1198,7 +1365,11 @@ class SymbolicShapeInference:
         new_shape = self._broadcast_shapes(shape0, shape1)
         t0 = self.known_vi_[node.input[0]]
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], t0.type.tensor_type.elem_type, new_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], t0.type.tensor_type.elem_type, new_shape
+            )
+        )
 
     def _infer_aten_diagonal(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
@@ -1240,7 +1411,11 @@ class SymbolicShapeInference:
         assert rank in [1, 2]
         num_samples = self._try_get_value(node, 1)
         di = rank - 1
-        last_dim = num_samples if num_samples else str(self._new_symbolic_dim_from_output(node, 0, di))
+        last_dim = (
+            num_samples
+            if num_samples
+            else str(self._new_symbolic_dim_from_output(node, 0, di))
+        )
         output_shape = sympy_shape[:-1] + [last_dim]
         vi = self.known_vi_[node.output[0]]
         vi.CopyFrom(
@@ -1254,21 +1429,33 @@ class SymbolicShapeInference:
     def _infer_aten_pool2d(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
         assert len(sympy_shape) == 4
-        sympy_shape[-2:] = [self._new_symbolic_dim_from_output(node, 0, i) for i in [2, 3]]
+        sympy_shape[-2:] = [
+            self._new_symbolic_dim_from_output(node, 0, i) for i in [2, 3]
+        ]
         self._update_computed_dims(sympy_shape)
         for i, o in enumerate(node.output):
             if not o:
                 continue
             vi = self.known_vi_[o]
-            elem_type = onnx.TensorProto.INT64 if i == 1 else self.known_vi_[node.input[0]].type.tensor_type.elem_type
-            vi.CopyFrom(helper.make_tensor_value_info(o, elem_type, get_shape_from_sympy_shape(sympy_shape)))
+            elem_type = (
+                onnx.TensorProto.INT64
+                if i == 1
+                else self.known_vi_[node.input[0]].type.tensor_type.elem_type
+            )
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    o, elem_type, get_shape_from_sympy_shape(sympy_shape)
+                )
+            )
 
     def _infer_aten_minmax(self, node):
         vi = self.known_vi_[node.output[0]]
         if len(node.input) == 1:
             vi.CopyFrom(
                 helper.make_tensor_value_info(
-                    node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, []
+                    node.output[0],
+                    self.known_vi_[node.input[0]].type.tensor_type.elem_type,
+                    [],
                 )
             )
         else:
@@ -1278,7 +1465,9 @@ class SymbolicShapeInference:
             dim = self._try_get_value(node, 1)
             if dim is None:
                 rank = self._get_shape_rank(node, 0)
-                output_shape = self._new_symbolic_shape(rank if keepdim else rank - 1, node)
+                output_shape = self._new_symbolic_shape(
+                    rank if keepdim else rank - 1, node
+                )
             else:
                 shape = self._get_sympy_shape(node, 0)
                 dim = handle_negative_axis(dim, len(shape))
@@ -1290,11 +1479,17 @@ class SymbolicShapeInference:
             output_shape = get_shape_from_sympy_shape(output_shape)
             vi.CopyFrom(
                 helper.make_tensor_value_info(
-                    node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, output_shape
+                    node.output[0],
+                    self.known_vi_[node.input[0]].type.tensor_type.elem_type,
+                    output_shape,
                 )
             )
             vi1 = self.known_vi_[node.output[1]]
-            vi1.CopyFrom(helper.make_tensor_value_info(node.output[1], onnx.TensorProto.INT64, output_shape))
+            vi1.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[1], onnx.TensorProto.INT64, output_shape
+                )
+            )
 
     def _infer_aten_unfold(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
@@ -1337,12 +1532,18 @@ class SymbolicShapeInference:
                         del sympy_shape[dim]
                 else:
                     rank = len(sympy_shape)
-                    sympy_shape = self._new_symbolic_shape(rank if keepdim else rank - 1, node)
+                    sympy_shape = self._new_symbolic_shape(
+                        rank if keepdim else rank - 1, node
+                    )
                 self._update_computed_dims(sympy_shape)
                 new_shape = get_shape_from_sympy_shape(sympy_shape)
         if node.output[0] and new_shape is not None:
             vi = self.known_vi_[node.output[0]]
-            vi.CopyFrom(helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, new_shape))
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[0], onnx.TensorProto.INT64, new_shape
+                )
+            )
 
     def _infer_BatchNormalization(self, node):
         self._propagate_shape_and_type(node)
@@ -1384,7 +1585,11 @@ class SymbolicShapeInference:
                     helper.make_tensor_value_info(
                         node.output[0],
                         self.known_vi_[node.input[0]].type.tensor_type.elem_type,
-                        get_shape_from_sympy_shape(self._new_symbolic_shape(self._get_shape_rank(node, 0), node)),
+                        get_shape_from_sympy_shape(
+                            self._new_symbolic_shape(
+                                self._get_shape_rank(node, 0), node
+                            )
+                        ),
                     )
                 )
             else:
@@ -1425,7 +1630,9 @@ class SymbolicShapeInference:
                 helper.make_tensor_value_info(
                     node.output[0],
                     vi.type.tensor_type.elem_type,
-                    get_shape_from_sympy_shape(self._new_symbolic_shape(shape_rank, node)),
+                    get_shape_from_sympy_shape(
+                        self._new_symbolic_shape(shape_rank, node)
+                    ),
                 )
             )
         else:
@@ -1471,7 +1678,10 @@ class SymbolicShapeInference:
         if get_opset(self.out_mp_) <= 10:
             scales = self._try_get_value(node, 1)
             if scales is not None:
-                new_sympy_shape = [sympy.simplify(sympy.floor(d * s)) for d, s in zip(input_sympy_shape, scales)]
+                new_sympy_shape = [
+                    sympy.simplify(sympy.floor(d * s))
+                    for d, s in zip(input_sympy_shape, scales)
+                ]
                 self._update_computed_dims(new_sympy_shape)
                 vi.CopyFrom(
                     helper.make_tensor_value_info(
@@ -1489,7 +1699,10 @@ class SymbolicShapeInference:
                 self._update_computed_dims(new_sympy_shape)
             elif scales is not None:
                 rank = len(scales)
-                if get_attribute(node, "coordinate_transformation_mode") == "tf_crop_and_resize":
+                if (
+                    get_attribute(node, "coordinate_transformation_mode")
+                    == "tf_crop_and_resize"
+                ):
                     assert len(roi) == 2 * rank
                     roi_start = list(roi)[:rank]
                     roi_end = list(roi)[rank:]
@@ -1499,11 +1712,15 @@ class SymbolicShapeInference:
                 scales = list(scales)
                 new_sympy_shape = [
                     sympy.simplify(sympy.floor(d * (end - start) * scale))
-                    for d, start, end, scale in zip(input_sympy_shape, roi_start, roi_end, scales)
+                    for d, start, end, scale in zip(
+                        input_sympy_shape, roi_start, roi_end, scales
+                    )
                 ]
                 self._update_computed_dims(new_sympy_shape)
             else:
-                new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node)
+                new_sympy_shape = self._new_symbolic_shape(
+                    self._get_shape_rank(node, 0), node
+                )
 
             vi.CopyFrom(
                 helper.make_tensor_value_info(
@@ -1531,19 +1748,31 @@ class SymbolicShapeInference:
             si.CopyFrom(self.known_vi_[node.input[i]])
             if i >= num_scan_states:
                 scan_input_dim = si.type.tensor_type.shape.dim
-                scan_input_dim.remove(scan_input_dim[scan_input_axes[i - num_scan_states]])
+                scan_input_dim.remove(
+                    scan_input_dim[scan_input_axes[i - num_scan_states]]
+                )
             si.name = subgraph_name
         self._onnx_infer_subgraph(node, subgraph)
         num_scan_outputs = len(node.output) - num_scan_states
-        scan_output_axes = get_attribute(node, "scan_output_axes", [0] * num_scan_outputs)
-        scan_input_dim = get_shape_from_type_proto(self.known_vi_[node.input[-1]].type)[scan_input_axes[-1]]
+        scan_output_axes = get_attribute(
+            node, "scan_output_axes", [0] * num_scan_outputs
+        )
+        scan_input_dim = get_shape_from_type_proto(self.known_vi_[node.input[-1]].type)[
+            scan_input_axes[-1]
+        ]
         for i, o in enumerate(node.output):
             vi = self.known_vi_[o]
             if i >= num_scan_states:
                 shape = get_shape_from_type_proto(subgraph.output[i].type)
-                new_dim = handle_negative_axis(scan_output_axes[i - num_scan_states], len(shape) + 1)
+                new_dim = handle_negative_axis(
+                    scan_output_axes[i - num_scan_states], len(shape) + 1
+                )
                 shape = shape[:new_dim] + [scan_input_dim] + shape[new_dim:]
-                vi.CopyFrom(helper.make_tensor_value_info(o, subgraph.output[i].type.tensor_type.elem_type, shape))
+                vi.CopyFrom(
+                    helper.make_tensor_value_info(
+                        o, subgraph.output[i].type.tensor_type.elem_type, shape
+                    )
+                )
             else:
                 vi.CopyFrom(subgraph.output[i])
             vi.name = o
@@ -1676,7 +1905,9 @@ class SymbolicShapeInference:
                                 e = new_sympy_shape[i]
                         except Exception:
                             logger.warning(
-                                "Unable to determine if {} <= {}, treat as equal".format(e, new_sympy_shape[i])
+                                "Unable to determine if {} <= {}, treat as equal".format(
+                                    e, new_sympy_shape[i]
+                                )
                             )
                             e = new_sympy_shape[i]
 
@@ -1684,7 +1915,9 @@ class SymbolicShapeInference:
                 if is_literal(new_sympy_shape[i]) and is_literal(s):
                     s = max(0, min(s, new_sympy_shape[i]))
 
-                new_sympy_shape[i] = sympy.simplify((e - s + t + (-1 if t > 0 else 1)) // t)
+                new_sympy_shape[i] = sympy.simplify(
+                    (e - s + t + (-1 if t > 0 else 1)) // t
+                )
 
             self._update_computed_dims(new_sympy_shape)
 
@@ -1709,7 +1942,9 @@ class SymbolicShapeInference:
             if type(input_sympy_data) == list or (
                 type(input_sympy_data) == np.array and len(input_sympy_data.shape) == 1
             ):
-                self.sympy_data_[node.output[0]] = input_sympy_data[starts[0] : ends[0] : steps[0]]
+                self.sympy_data_[node.output[0]] = input_sympy_data[
+                    starts[0] : ends[0] : steps[0]
+                ]
 
     def _infer_SoftmaxCrossEntropyLoss(self, node):
         vi = self.known_vi_[node.output[0]]
@@ -1724,7 +1959,9 @@ class SymbolicShapeInference:
 
     def _infer_Split_Common(self, node, make_value_info_func):
         input_sympy_shape = self._get_sympy_shape(node, 0)
-        axis = handle_negative_axis(get_attribute(node, "axis", 0), len(input_sympy_shape))
+        axis = handle_negative_axis(
+            get_attribute(node, "axis", 0), len(input_sympy_shape)
+        )
         split = get_attribute(node, "split")
         if not split:
             num_outputs = len(node.output)
@@ -1739,7 +1976,11 @@ class SymbolicShapeInference:
                 make_value_info_func(
                     node.output[i_o],
                     self.known_vi_[node.input[0]].type.tensor_type.elem_type,
-                    get_shape_from_sympy_shape(input_sympy_shape[:axis] + [split[i_o]] + input_sympy_shape[axis + 1 :]),
+                    get_shape_from_sympy_shape(
+                        input_sympy_shape[:axis]
+                        + [split[i_o]]
+                        + input_sympy_shape[axis + 1 :]
+                    ),
                 )
             )
             self.known_vi_[vi.name] = vi
@@ -1808,7 +2049,9 @@ class SymbolicShapeInference:
                 new_sympy_shape.append(new_dim)
             self._update_computed_dims(new_sympy_shape)
         else:
-            new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node)
+            new_sympy_shape = self._new_symbolic_shape(
+                self._get_shape_rank(node, 0), node
+            )
         vi = self.known_vi_[node.output[0]]
         vi.CopyFrom(
             helper.make_tensor_value_info(
@@ -1845,7 +2088,11 @@ class SymbolicShapeInference:
 
         for i_o in range(len(node.output)):
             vi = self.known_vi_[node.output[i_o]]
-            vi.CopyFrom(helper.make_tensor_value_info(node.output[i_o], vi.type.tensor_type.elem_type, new_shape))
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[i_o], vi.type.tensor_type.elem_type, new_shape
+                )
+            )
 
     def _infer_Transpose(self, node):
         if node.input[0] in self.sympy_data_:
@@ -1853,7 +2100,11 @@ class SymbolicShapeInference:
             perm = get_attribute(node, "perm", reversed(list(range(len(data_shape)))))
             input_data = self.sympy_data_[node.input[0]]
             self.sympy_data_[node.output[0]] = (
-                np.transpose(np.array(input_data).reshape(*data_shape), axes=tuple(perm)).flatten().tolist()
+                np.transpose(
+                    np.array(input_data).reshape(*data_shape), axes=tuple(perm)
+                )
+                .flatten()
+                .tolist()
             )
 
     def _infer_Unsqueeze(self, node):
@@ -1901,7 +2152,9 @@ class SymbolicShapeInference:
         assert map_key_type is not None
         new_vi = onnx.ValueInfoProto()
         new_vi.name = node.output[0]
-        new_vi.type.sequence_type.elem_type.map_type.value_type.tensor_type.elem_type = onnx.TensorProto.FLOAT
+        new_vi.type.sequence_type.elem_type.map_type.value_type.tensor_type.elem_type = (
+            onnx.TensorProto.FLOAT
+        )
         new_vi.type.sequence_type.elem_type.map_type.key_type = map_key_type
         vi = self.known_vi_[node.output[0]]
         vi.CopyFrom(new_vi)
@@ -1936,7 +2189,9 @@ class SymbolicShapeInference:
                 else:
                     past_shape[3] = f"{past_shape[3]}+{input_shape[1]}"
                 vi = self.known_vi_[node.output[1]]
-                vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, past_shape))
+                vi.CopyFrom(
+                    helper.make_tensor_value_info(vi.name, output_dtype, past_shape)
+                )
 
     def _infer_BiasGelu(self, node):
         self._propagate_shape_and_type(node)
@@ -1961,17 +2216,29 @@ class SymbolicShapeInference:
 
         word_embedding_dtype = self.known_vi_[node.input[2]].type.tensor_type.elem_type
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], word_embedding_dtype, output_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], word_embedding_dtype, output_shape
+            )
+        )
 
         mask_index_shape = [input_ids_shape[0]]
         vi = self.known_vi_[node.output[1]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[1], onnx.TensorProto.INT32, mask_index_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[1], onnx.TensorProto.INT32, mask_index_shape
+            )
+        )
 
         if len(node.output) > 2:
             # Optional output of add before layer nomalization is done
             # shape is same as the output
             vi = self.known_vi_[node.output[2]]
-            vi.CopyFrom(helper.make_tensor_value_info(node.output[2], word_embedding_dtype, output_shape))
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[2], word_embedding_dtype, output_shape
+                )
+            )
 
     def _infer_SkipLayerNormalization(self, node):
         self._propagate_shape_and_type(node)
@@ -1985,7 +2252,9 @@ class SymbolicShapeInference:
         # set the context output seperately.
         # The first output is autograd's context.
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, []))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, [])
+        )
 
         # Outputs after autograd's context are tensors.
         # We assume their ranks are fixed for different model inputs.
@@ -1994,14 +2263,22 @@ class SymbolicShapeInference:
             vi = self.known_vi_[node.output[i + 1]]
             sympy_shape = self._new_symbolic_shape(output_tensor_ranks[i], node)
             shape = get_shape_from_sympy_shape(sympy_shape)
-            value_info = helper.make_tensor_value_info(node.output[i + 1], output_tensor_types[i], shape)
+            value_info = helper.make_tensor_value_info(
+                node.output[i + 1], output_tensor_types[i], shape
+            )
             vi.CopyFrom(value_info)
 
     def _propagate_shape_and_type(self, node, input_index=0, output_index=0):
         shape = self._get_shape(node, input_index)
-        output_dtype = self.known_vi_[node.input[input_index]].type.tensor_type.elem_type
+        output_dtype = self.known_vi_[
+            node.input[input_index]
+        ].type.tensor_type.elem_type
         vi = self.known_vi_[node.output[output_index]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[output_index], output_dtype, shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[output_index], output_dtype, shape
+            )
+        )
 
     def _is_none_dim(self, dim_value):
         if type(dim_value) != str:
@@ -2036,7 +2313,9 @@ class SymbolicShapeInference:
             for i_dim, dim in enumerate(input_shape):
                 if dim is None:
                     # some models use None for symbolic dim in input, replace it with a string
-                    input_dims[i_dim].dim_param = str(self._new_symbolic_dim(i.name, i_dim))
+                    input_dims[i_dim].dim_param = str(
+                        self._new_symbolic_dim(i.name, i_dim)
+                    )
 
             self.input_symbols_.update([d for d in input_shape if type(d) == str])
 
@@ -2057,7 +2336,9 @@ class SymbolicShapeInference:
 
         # compute prerequesite for node for topological sort
         # node with subgraphs may have dependency on implicit inputs, which will affect topological sort
-        prereq_for_node = {}  # map from node to all its inputs, including implicit ones in subgraph
+        prereq_for_node = (
+            {}
+        )  # map from node to all its inputs, including implicit ones in subgraph
 
         def get_prereq(node):
             names = set(i for i in node.input if i)
@@ -2075,7 +2356,13 @@ class SymbolicShapeInference:
                 for n in g.node:
                     g_outputs_and_initializers.update(n.output)
                 for n in g.node:
-                    g_prereq.update([i for i in get_prereq(n) if i not in g_outputs_and_initializers])
+                    g_prereq.update(
+                        [
+                            i
+                            for i in get_prereq(n)
+                            if i not in g_outputs_and_initializers
+                        ]
+                    )
                 names.update(g_prereq)
                 # remove subgraph inputs from g_prereq since those are local-only
                 for i in g.input:
@@ -2088,16 +2375,28 @@ class SymbolicShapeInference:
 
         # topological sort nodes, note there might be dead nodes so we check if all graph outputs are reached to terminate
         sorted_nodes = []
-        sorted_known_vi = set([i.name for i in list(self.out_mp_.graph.input) + list(self.out_mp_.graph.initializer)])
+        sorted_known_vi = set(
+            [
+                i.name
+                for i in list(self.out_mp_.graph.input)
+                + list(self.out_mp_.graph.initializer)
+            ]
+        )
         if any([o.name in sorted_known_vi for o in self.out_mp_.graph.output]):
             # Loop/Scan will have some graph output in graph inputs, so don't do topological sort
             sorted_nodes = self.out_mp_.graph.node
         else:
-            while not all([o.name in sorted_known_vi for o in self.out_mp_.graph.output]):
+            while not all(
+                [o.name in sorted_known_vi for o in self.out_mp_.graph.output]
+            ):
                 old_sorted_nodes_len = len(sorted_nodes)
                 for node in self.out_mp_.graph.node:
                     if (node.output[0] not in sorted_known_vi) and all(
-                        [i in sorted_known_vi for i in prereq_for_node[node.output[0]] if i]
+                        [
+                            i in sorted_known_vi
+                            for i in prereq_for_node[node.output[0]]
+                            if i
+                        ]
                     ):
                         sorted_known_vi.update(node.output)
                         sorted_nodes.append(node)
@@ -2123,7 +2422,11 @@ class SymbolicShapeInference:
                 for attr in node.attribute:
                     # TODO: Is overload_name needed?
                     if attr.name == "operator":
-                        aten_op_name = attr.s.decode("utf-8") if isinstance(attr.s, bytes) else attr.s
+                        aten_op_name = (
+                            attr.s.decode("utf-8")
+                            if isinstance(attr.s, bytes)
+                            else attr.s
+                        )
                         if aten_op_name in self.aten_op_dispatcher_:
                             known_aten_op = True
                             self.aten_op_dispatcher_[aten_op_name](node)
@@ -2133,7 +2436,9 @@ class SymbolicShapeInference:
                 logger.debug(node.op_type + ": " + node.name)
                 for i, name in enumerate(node.input):
                     logger.debug(
-                        "  Input {}: {} {}".format(i, name, "initializer" if name in self.initializers_ else "")
+                        "  Input {}: {} {}".format(
+                            i, name, "initializer" if name in self.initializers_ else ""
+                        )
                     )
 
             # onnx automatically merge dims with value, i.e. Mul(['aaa', 'bbb'], [1000, 1]) -> [1000, 'bbb']
@@ -2152,8 +2457,20 @@ class SymbolicShapeInference:
                 vi = self.known_vi_[node.output[0]]
                 out_rank = len(get_shape_from_type_proto(vi.type))
                 in_shapes = [self._get_shape(node, i) for i in range(len(node.input))]
-                for d in range(out_rank - (2 if node.op_type in ["MatMul", "MatMulInteger", "MatMulInteger16"] else 0)):
-                    in_dims = [s[len(s) - out_rank + d] for s in in_shapes if len(s) + d >= out_rank]
+                for d in range(
+                    out_rank
+                    - (
+                        2
+                        if node.op_type
+                        in ["MatMul", "MatMulInteger", "MatMulInteger16"]
+                        else 0
+                    )
+                ):
+                    in_dims = [
+                        s[len(s) - out_rank + d]
+                        for s in in_shapes
+                        if len(s) + d >= out_rank
+                    ]
                     if len(in_dims) > 1:
                         self._check_merged_dims(in_dims, allow_broadcast=True)
 
@@ -2166,7 +2483,9 @@ class SymbolicShapeInference:
                 if out_type_kind not in ["tensor_type", "sparse_tensor_type", None]:
                     if self.verbose_ > 2:
                         if out_type_kind == "sequence_type":
-                            seq_cls_type = out_type.sequence_type.elem_type.WhichOneof("value")
+                            seq_cls_type = out_type.sequence_type.elem_type.WhichOneof(
+                                "value"
+                            )
                             if "tensor_type" == seq_cls_type:
                                 logger.debug(
                                     "  {}: sequence of {} {}".format(
@@ -2178,27 +2497,42 @@ class SymbolicShapeInference:
                                     )
                                 )
                             else:
-                                logger.debug("  {}: sequence of {}".format(node.output[i_o], seq_cls_type))
+                                logger.debug(
+                                    "  {}: sequence of {}".format(
+                                        node.output[i_o], seq_cls_type
+                                    )
+                                )
                         else:
-                            logger.debug("  {}: {}".format(node.output[i_o], out_type_kind))
+                            logger.debug(
+                                "  {}: {}".format(node.output[i_o], out_type_kind)
+                            )
                     continue
 
                 out_shape = get_shape_from_value_info(vi)
-                out_type_undefined = out_type.tensor_type.elem_type == onnx.TensorProto.UNDEFINED
+                out_type_undefined = (
+                    out_type.tensor_type.elem_type == onnx.TensorProto.UNDEFINED
+                )
                 if self.verbose_ > 2:
                     logger.debug(
                         "  {}: {} {}".format(
                             node.output[i_o],
                             str(out_shape),
-                            onnx.TensorProto.DataType.Name(vi.type.tensor_type.elem_type),
+                            onnx.TensorProto.DataType.Name(
+                                vi.type.tensor_type.elem_type
+                            ),
                         )
                     )
                     if node.output[i_o] in self.sympy_data_:
-                        logger.debug("  Sympy Data: " + str(self.sympy_data_[node.output[i_o]]))
+                        logger.debug(
+                            "  Sympy Data: " + str(self.sympy_data_[node.output[i_o]])
+                        )
 
                 # onnx >= 1.11.0, use unk__#index instead of None when the shape dim is uncertain
                 if (
-                    out_shape is not None and (None in out_shape or self._is_shape_contains_none_dim(out_shape))
+                    out_shape is not None
+                    and (
+                        None in out_shape or self._is_shape_contains_none_dim(out_shape)
+                    )
                 ) or out_type_undefined:
                     if self.auto_merge_:
                         if node.op_type in [
@@ -2220,21 +2554,36 @@ class SymbolicShapeInference:
                             "Min",
                             "Max",
                         ]:
-                            shapes = [self._get_shape(node, i) for i in range(len(node.input))]
+                            shapes = [
+                                self._get_shape(node, i) for i in range(len(node.input))
+                            ]
                             if node.op_type in [
                                 "MatMul",
                                 "MatMulInteger",
                                 "MatMulInteger16",
                             ]:
-                                if None in out_shape or self._is_shape_contains_none_dim(out_shape):
+                                if (
+                                    None in out_shape
+                                    or self._is_shape_contains_none_dim(out_shape)
+                                ):
                                     if None in out_shape:
                                         idx = out_shape.index(None)
                                     else:
-                                        idx = out_shape.index(self._is_shape_contains_none_dim(out_shape))
-                                    dim_idx = [len(s) - len(out_shape) + idx for s in shapes]
+                                        idx = out_shape.index(
+                                            self._is_shape_contains_none_dim(out_shape)
+                                        )
+                                    dim_idx = [
+                                        len(s) - len(out_shape) + idx for s in shapes
+                                    ]
                                     # only support auto merge for MatMul for dim < rank-2 when rank > 2
-                                    assert len(shapes[0]) > 2 and dim_idx[0] < len(shapes[0]) - 2
-                                    assert len(shapes[1]) > 2 and dim_idx[1] < len(shapes[1]) - 2
+                                    assert (
+                                        len(shapes[0]) > 2
+                                        and dim_idx[0] < len(shapes[0]) - 2
+                                    )
+                                    assert (
+                                        len(shapes[1]) > 2
+                                        and dim_idx[1] < len(shapes[1]) - 2
+                                    )
                         elif node.op_type == "Expand":
                             # auto merge for cases like Expand([min(batch, 1), min(seq, 512)], [batch, seq])
                             shapes = [
@@ -2246,11 +2595,15 @@ class SymbolicShapeInference:
 
                         if shapes:
                             for idx in range(len(out_shape)):
-                                if out_shape[idx] is not None and not self._is_none_dim(out_shape[idx]):
+                                if out_shape[idx] is not None and not self._is_none_dim(
+                                    out_shape[idx]
+                                ):
                                     continue
                                 # note that the broadcasting rule aligns from right to left
                                 # if a tensor has a lower rank (dim_idx[idx] < 0), it would automatically broadcast and need no merge
-                                dim_idx = [len(s) - len(out_shape) + idx for s in shapes]
+                                dim_idx = [
+                                    len(s) - len(out_shape) + idx for s in shapes
+                                ]
                                 if len(dim_idx) > 0:
                                     self._add_suggested_merge(
                                         [
@@ -2266,12 +2619,22 @@ class SymbolicShapeInference:
                         self.run_ = False
 
                     # create new dynamic dims for ops not handled by symbolic shape inference
-                    if self.run_ == False and not node.op_type in self.dispatcher_ and not known_aten_op:
-                        is_unknown_op = out_type_undefined and (out_shape is None or len(out_shape) == 0)
+                    if (
+                        self.run_ == False
+                        and not node.op_type in self.dispatcher_
+                        and not known_aten_op
+                    ):
+                        is_unknown_op = out_type_undefined and (
+                            out_shape is None or len(out_shape) == 0
+                        )
                         if is_unknown_op:
                             # unknown op to ONNX, maybe from higher opset or other domain
                             # only guess the output rank from input 0 when using guess_output_rank option
-                            out_rank = self._get_shape_rank(node, 0) if self.guess_output_rank_ else -1
+                            out_rank = (
+                                self._get_shape_rank(node, 0)
+                                if self.guess_output_rank_
+                                else -1
+                            )
                         else:
                             # valid ONNX op, but not handled by symbolic shape inference, just assign dynamic shape
                             out_rank = len(out_shape)
@@ -2280,7 +2643,9 @@ class SymbolicShapeInference:
                             new_shape = self._new_symbolic_shape(out_rank, node, i_o)
                             if out_type_undefined:
                                 # guess output data type from input vi if not defined
-                                out_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
+                                out_dtype = self.known_vi_[
+                                    node.input[0]
+                                ].type.tensor_type.elem_type
                             else:
                                 # otherwise, use original data type
                                 out_dtype = vi.type.tensor_type.elem_type
@@ -2312,7 +2677,12 @@ class SymbolicShapeInference:
                             continue  # continue the inference after guess, no need to stop as no merge is needed
 
                     if self.verbose_ > 0 or not self.auto_merge_ or out_type_undefined:
-                        logger.debug("Stopping at incomplete shape inference at " + node.op_type + ": " + node.name)
+                        logger.debug(
+                            "Stopping at incomplete shape inference at "
+                            + node.op_type
+                            + ": "
+                            + node.name
+                        )
                         logger.debug("node inputs:")
                         for i in node.input:
                             logger.debug(self.known_vi_[i])
@@ -2332,19 +2702,23 @@ class SymbolicShapeInference:
                 output.CopyFrom(self.known_vi_[output.name])
 
     @staticmethod
-    def infer_shapes(in_mp, int_max=2**31 - 1, auto_merge=False, guess_output_rank=False, verbose=0):
+    def infer_shapes(
+        in_mp, int_max=2**31 - 1, auto_merge=False, guess_output_rank=False, verbose=0
+    ):
         onnx_opset = get_opset(in_mp)
         if (not onnx_opset) or onnx_opset < 7:
             logger.warning("Only support models of onnx opset 7 and above.")
             return None
-        symbolic_shape_inference = SymbolicShapeInference(int_max, auto_merge, guess_output_rank, verbose)
+        symbolic_shape_inference = SymbolicShapeInference(
+            int_max, auto_merge, guess_output_rank, verbose
+        )
         all_shapes_inferred = False
         symbolic_shape_inference._preprocess(in_mp)
         while symbolic_shape_inference.run_:
             all_shapes_inferred = symbolic_shape_inference._infer_impl()
         symbolic_shape_inference._update_output_from_vi()
         if not all_shapes_inferred:
-            raise Exception("Incomplete symbolic shape inference")
+            logger.warning("Incomplete symbolic shape inference")
         return symbolic_shape_inference.out_mp_
 
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/requirements.txt b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/requirements.txt
index 42288781..b80f9f40 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/requirements.txt
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/requirements.txt
@@ -1,3 +1,19 @@
-sympy
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+onnxsim
 packaging
-onnxsim
\ No newline at end of file
+sympy
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/runtime_backend_iluvatar.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/runtime_backend_iluvatar.py
index 0834ab6d..7781481d 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/runtime_backend_iluvatar.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/runtime_backend_iluvatar.py
@@ -657,4 +657,4 @@ class RuntimeBackendILUVATAR(runtime_backend.RuntimeBackend):
                 i += 1
             return data
         else:
-            raise ValueError("Please provide input type")
\ No newline at end of file
+            raise ValueError("Please provide input type")
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json
index 278e9e92..687fc7a0 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json
@@ -5,10 +5,10 @@
     "framework_version": "2.4.0",
     "model_format": "saved_model",
     "model_precision": "FP32",
-    "inputs": "input_segment:0,input_token:0",
+    "inputs": "input_segment0,input_token0",
     "outputs": "Identity:0",
-    "input_shape": {"input_segment:0": [1, 1024], "input_token:0": [1, 1024]},
+    "input_shape": {"input_segment0": [1, 1024], "input_token0": [1, 1024]},
     "input_type": "FLOAT32,FLOAT32",
     "dataset_name": "open_cail2019",
-    "max_batch_size": 64
-}
\ No newline at end of file
+    "max_batch_size": 128
+}
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/requirements.txt b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
index e57e2c9c..8293d0ba 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
@@ -8,5 +8,5 @@ opencv-python
 transformers
 tokenization
 fpdf
-typing-extensions==3.7.4.3
+typing-extensions==4.12.2
 numpy==1.23.0
-- 
Gitee


From edd76cc303697b2f7731bb67933282248adffd3e Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Mon, 20 Jan 2025 11:25:57 +0800
Subject: [PATCH 33/35] update model

---
 .../swin_transformer_large/ixrt/ci/prepare.sh        |  6 ++++--
 models/nlp/language_model/albert/ixrt/ci/prepare.sh  | 12 ++++++------
 models/nlp/language_model/deberta/ixrt/ci/prepare.sh |  5 +++--
 models/nlp/language_model/roberta/ixrt/ci/prepare.sh |  7 ++++---
 .../nlp/language_model/roberta/ixrt/export_onnx.py   |  4 ++--
 .../nlp/language_model/roformer/ixrt/ci/prepare.sh   |  2 --
 .../ctr-prediction/widedeep/ixrt/ci/prepare.sh       |  1 -
 7 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
index 572c069a..b7fe2e69 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
+++ b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
@@ -25,6 +25,8 @@ else
     echo "Not Support Os"
 fi
 
+apt install -y libnuma-dev
+
 pip install -r requirements.txt
 mkdir -p general_perf/model_zoo/regular
 mkdir -p general_perf/model_zoo/popular
@@ -41,7 +43,7 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requ
 
 # copy data
 cp -r /root/data/datasets/open_imagenet/* ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/
-mkdir -p ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
-cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/swin-large
+cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/swin-large
 
 cp -r /root/data/3rd_party/workloads ./ByteMLPerf/byte_infer_perf/general_perf/
\ No newline at end of file
diff --git a/models/nlp/language_model/albert/ixrt/ci/prepare.sh b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
index 9559705d..d78865ec 100644
--- a/models/nlp/language_model/albert/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
@@ -21,7 +21,6 @@ apt install -y libnuma-dev
 pip3 install -r requirements.txt
 
 cp /root/data/3rd_party/albert-torch-fp32.json ./
-cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 
 python3 torch2onnx.py --model_path /root/data/checkpoints/open_albert/albert-base-squad.pt --output_path albert-torch-fp32.onnx
 onnxsim albert-torch-fp32.onnx albert-torch-fp32-sim.onnx
@@ -29,17 +28,14 @@ onnxsim albert-torch-fp32.onnx albert-torch-fp32-sim.onnx
 mkdir -p data/open_albert
 mv ./albert-torch-fp32-sim.onnx data/open_albert/albert.onnx
 
-wget http://files.deepspark.org.cn:880/deepspark/madlag.tar
-tar xvf madlag.tar
-rm -f madlag.tar
-
 # link and install requirements
 ln -s ../../../../../toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 
 # edit madlag/albert-base-v2-squad path
-sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
+# sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
+mv madlag ./ByteMLPerf/byte_infer_perf/general_perf/
 
 # copy open_squad data
 cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
@@ -50,6 +46,10 @@ cp /root/data/checkpoints/open_albert/*.pt ./ByteMLPerf/byte_infer_perf/general_
 
 # run acc script
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+# wget http://files.deepspark.org.cn:880/deepspark/madlag.tar
+cp /root/data/3rd_party/madlag.tar ./
+tar xvf madlag.tar
+rm -f madlag.tar
 cp -r /root/data/3rd_party/workloads ./
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/common.py
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/compile_backend_iluvatar.py
diff --git a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
index 4b3452fe..d440393e 100644
--- a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
@@ -21,7 +21,7 @@ apt install -y libnuma-dev
 pip install -r requirements.txt
 
 cp /root/data/3rd_party/deberta-torch-fp32.json ./
-cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
+
 python3 torch2onnx.py --model_path /root/data/checkpoints/open_deberta/deberta-base-squad.pt --output_path deberta-torch-fp32.onnx
 onnxsim deberta-torch-fp32.onnx deberta-torch-fp32-sim.onnx
 python3 remove_clip_and_cast.py
@@ -43,7 +43,8 @@ cp ./deberta-sim-drop-clip-drop-invaild-cast.onnx ./ByteMLPerf/byte_infer_perf/g
 
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 cp -r /root/data/3rd_party/workloads ./
-wget http://files.deepspark.org.cn:880/deepspark/Palak.tar
+# wget http://files.deepspark.org.cn:880/deepspark/Palak.tar
+cp /root/data/3rd_party/Palak.tar ./
 tar -zxvf Palak.tar
 
 #接着修改代码：ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py -AutoTokenizer.from_pretrained("Palak/microsoft_deberta-base_squad") => AutoTokenizer.from_pretrained("/Your/Path/Palak/microsoft_deberta-base_squad")
diff --git a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
index 89734e52..81d02ab0 100644
--- a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
@@ -23,7 +23,6 @@ pip install -r requirements.txt
 mkdir -p data
 cp -r /root/data/checkpoints/open_roberta data/
 cp /root/data/3rd_party/roberta-torch-fp32.json ./
-cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 # export onnx
 python3 export_onnx.py --model_path data/open_roberta/roberta-base-squad.pt --output_path data/open_roberta/roberta-torch-fp32.onnx
 
@@ -37,13 +36,15 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requ
 
 # Move open_roberta
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
-mv data/open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+cp -r data/open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 
 # Get open_squad
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
 cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
 
 # Get csarron.tar
-wget http://files.deepspark.org.cn:880/deepspark/csarron.tar
+# wget http://files.deepspark.org.cn:880/deepspark/csarron.tar
+cp /root/data/3rd_party/csarron.tar ./
 tar xf csarron.tar
 rm -f csarron.tar
 mv csarron/ ./ByteMLPerf/byte_infer_perf/general_perf/
diff --git a/models/nlp/language_model/roberta/ixrt/export_onnx.py b/models/nlp/language_model/roberta/ixrt/export_onnx.py
index bc9d2da7..9f115b73 100644
--- a/models/nlp/language_model/roberta/ixrt/export_onnx.py
+++ b/models/nlp/language_model/roberta/ixrt/export_onnx.py
@@ -20,8 +20,8 @@ import torch
 
 
 def torch_to_onnx(model_path, output_path):
-    model_name = output_path.split(".")[0]
-    with open(model_name + ".json", "r") as f:
+    model_name = output_path.split("/")[-1][:-4]
+    with open(model_name + "json", "r") as f:
         model_info = json.load(f)
     model_inputs = model_info["inputs"].split(",")
     input_shapes = model_info["input_shape"]
diff --git a/models/nlp/language_model/roformer/ixrt/ci/prepare.sh b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
index 5b1e9dd6..c3cc4f3d 100644
--- a/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
@@ -30,8 +30,6 @@ python3 export_onnx.py --model_path ./data/open_roformer --output_path ./data/op
 onnxsim ./data/open_roformer/roformer-frozen_org.onnx ./data/open_roformer/roformer-frozen.onnx
 python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer.onnx
 
-cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
-
 # link ByteMLPerf and install requirements
 ln -s ../../../../../toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
index f42aed23..56a1bd11 100644
--- a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
@@ -20,7 +20,6 @@ apt install -y libnuma-dev
 
 pip install -r requirements.txt
 cp -r /root/data/checkpoints/open_wide_deep_saved_model ./
-cp /root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py ./
 python3 export_onnx.py --model_path open_wide_deep_saved_model --output_path open_wide_deep_saved_model/widedeep.onnx
 
 # Simplify onnx model
-- 
Gitee


From ebe526c627461288c53ce3713e179f237e9f2b04 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Mon, 20 Jan 2025 15:39:48 +0800
Subject: [PATCH 34/35] update widedeep

---
 models/recommendation/ctr-prediction/widedeep/ixrt/README.md   | 3 ++-
 .../recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
index e7f6246a..c6653cab 100644
--- a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
@@ -70,7 +70,8 @@ wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv
 mv eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
 
 wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx
-mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
+cp open_wide_deep_saved_model/* ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
+mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape.onnx
 
 # Run Acc scripts
 cd ./ByteMLPerf/byte_infer_perf/general_perf
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
index 56a1bd11..2e65a751 100644
--- a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
@@ -41,6 +41,7 @@ mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
 cp /root/data/datasets/eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
 
 wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx
-mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
+cp open_wide_deep_saved_model/* ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
+mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape.onnx
 
 cp -r /root/data/3rd_party/workloads ./ByteMLPerf/byte_infer_perf/general_perf/
\ No newline at end of file
-- 
Gitee


From 9e6138173e7934fec928d845131c4cc9bc5ad3d3 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Tue, 21 Jan 2025 14:14:33 +0800
Subject: [PATCH 35/35] update

---
 models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
index bbe3af2b..2ee5de0b 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
+++ b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
@@ -29,5 +29,5 @@ pip3 install -r requirements.txt
 
 ln -s /root/data/checkpoints/20210601_u2++_conformer_exp_aishell ./conformer_checkpoints
 
-cp -r /root/data/datasets/AISHELL/data_aishell ./aishell_test_data
-bash scripts/aishell_data_prepare.sh aishell_test_data tools
\ No newline at end of file
+ln -s /root/data/datasets/AISHELL/data_aishell ./aishell_test_data
+bash scripts/aishell_data_prepare.sh ./aishell_test_data ./tools
\ No newline at end of file
-- 
Gitee