diff --git a/README.md b/README.md
index b51328219444c0816cb6ce62b66e6da013202715..a3dc5e12a40928ce64d7dacd4d7f6d7cbb4d814d 100644
--- a/README.md
+++ b/README.md
@@ -906,7 +906,7 @@ DeepSparkInference将按季度进行版本更新，后续会逐步丰富模型
         <td rowspan=2>Lightweight OpenPose</td>
         <td>FP16</td>
         <td>-</td>
-        <td><a href="models/cv/pose_estimation/lightweightopenpose/ixrt/README.md#fp16">Supported</a></td>
+        <td><a href="models/cv/pose_estimation/lightweight_openpose/ixrt/README.md#fp16">Supported</a></td>
     </tr>
     <tr align="center">
         <td>INT8</td>
diff --git a/models/cv/classification/alexnet/ixrt/ci/prepare.sh b/models/cv/classification/alexnet/ixrt/ci/prepare.sh
index 065a9d03e05f819e61d0a53c861e87c1d8eb3d20..7795e04dfe6bd3409a7f4f9164ef10b864a56254 100644
--- a/models/cv/classification/alexnet/ixrt/ci/prepare.sh
+++ b/models/cv/classification/alexnet/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
-python3 export_onnx.py --origin_model /root/data/checkpoints/alexnet.pth --output_model checkpoints/alexnet.onnx
\ No newline at end of file
+python3 export_onnx.py --origin_model /root/data/checkpoints/alexnet-owt-7be5be79.pth --output_model checkpoints/alexnet.onnx
\ No newline at end of file
diff --git a/models/cv/classification/convnext_small/ixrt/README.md b/models/cv/classification/convnext_small/ixrt/README.md
index c65501805ff3cae0e3528be88ca83f372ef5c643..2e9f8521adfbfdbe650607a3810ba00b52c43841 100644
--- a/models/cv/classification/convnext_small/ixrt/README.md
+++ b/models/cv/classification/convnext_small/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install ppq
-pip3 install tqdm
-pip3 install cuda-python
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/convnext_small/ixrt/ci/prepare.sh b/models/cv/classification/convnext_small/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a6336683e2a3375a8d258364b758150c06e1a7e3
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/ci/prepare.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+python3 export.py --weight /root/data/checkpoints/convnext_small-0c510722.pth --output convnext_small.onnx
\ No newline at end of file
diff --git a/models/cv/classification/convnext_small/ixrt/inference.py b/models/cv/classification/convnext_small/ixrt/inference.py
index 22f1644ced656c96602e15e468166d6df4fec92c..3d3cf572533d574dbb7c80d6d247ce1d0d8a1131 100644
--- a/models/cv/classification/convnext_small/ixrt/inference.py
+++ b/models/cv/classification/convnext_small/ixrt/inference.py
@@ -82,6 +82,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -116,6 +117,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        end2end_time = end_time - start_time
+        print(F"E2E time : {end2end_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/convnext_small/ixrt/requirements.txt b/models/cv/classification/convnext_small/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..520130b7d8ff1a3a6ef5b97c52eeb10fb870f6ed
--- /dev/null
+++ b/models/cv/classification/convnext_small/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+tabulate
+ppq
+tqdm
+cuda-python
\ No newline at end of file
diff --git a/models/cv/classification/cspdarknet53/ixrt/README.md b/models/cv/classification/cspdarknet53/ixrt/README.md
index 4b19154269e08404ed7620fdbb711986e9d224e2..b8fc83ae0ebcaf4893f1cfca2dc101b636cb4ed0 100644
--- a/models/cv/classification/cspdarknet53/ixrt/README.md
+++ b/models/cv/classification/cspdarknet53/ixrt/README.md
@@ -15,12 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install onnx
-pip3 install tqdm
-pip3 install onnxsim
-pip3 install ppq
-pip3 install mmcv==1.5.3
-pip3 install mmcls
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -42,8 +37,8 @@ wget -O cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth https://download.
 python3 export.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx
 
 # Use onnxsim optimize onnx model
-mkdir -p data/checkpoints/cspdarknet53_ckpt
-onnxsim cspdarknet5.onnx data/checkpoints/cspdarknet53_ckpt/cspdarknet53_sim.onnx
+mkdir -p checkpoints
+onnxsim cspdarknet5.onnx checkpoints/cspdarknet53_sim.onnx
 
 ```
 
@@ -51,7 +46,7 @@ onnxsim cspdarknet5.onnx data/checkpoints/cspdarknet53_ckpt/cspdarknet53_sim.onn
 
 ```bash
 export DATASETS_DIR=/Path/to/imagenet_val/
-export CHECKPOINTS_DIR=/Path/to/data/checkpoints/cspdarknet53_ckpt
+export CHECKPOINTS_DIR=/Path/to/checkpoints/
 export CONFIG_DIR=./config/CSPDARKNET53_CONFIG
 ```
 
diff --git a/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..29a65f01aca96bb5cfde0bb87746dc684020c657
--- /dev/null
+++ b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
+
+python3 export.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight /root/data/checkpoints/cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx
+
+mkdir -p checkpoints
+onnxsim cspdarknet53.onnx checkpoints/cspdarknet53_sim.onnx
\ No newline at end of file
diff --git a/models/cv/classification/cspdarknet53/ixrt/inference.py b/models/cv/classification/cspdarknet53/ixrt/inference.py
index 56b7f51c28143f9911343ed8490af1eda2981721..360b0cf00f19a20ec775d382fda2fa52079725b5 100755
--- a/models/cv/classification/cspdarknet53/ixrt/inference.py
+++ b/models/cv/classification/cspdarknet53/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -107,6 +108,9 @@ def main(config):
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
 
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/cspdarknet53/ixrt/requirements.txt b/models/cv/classification/cspdarknet53/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..972db4d81c8946b10c741f25f978e5c899172474
--- /dev/null
+++ b/models/cv/classification/cspdarknet53/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+onnx
+tqdm
+onnxsim
+ppq
+mmcv==1.5.3
+mmcls
+protobuf==3.20.0
\ No newline at end of file
diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_fp16_accuracy.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_fp16_accuracy.sh
rename to models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_accuracy.sh
diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_fp16_performance.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_fp16_performance.sh
rename to models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_performance.sh
diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_int8_accuracy.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_accuracy.sh
similarity index 100%
rename from models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_int8_accuracy.sh
rename to models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_accuracy.sh
diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_int8_performance.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_performance.sh
similarity index 100%
rename from models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet50_int8_performance.sh
rename to models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_performance.sh
diff --git a/models/cv/classification/cspresnet50/ixrt/README.md b/models/cv/classification/cspresnet50/ixrt/README.md
index 9a5d01a8e72c7e59eb3f00cfc19e98548bff4e54..a95c9a4c99cf3d1678b2b5da234a456b40151a9f 100644
--- a/models/cv/classification/cspresnet50/ixrt/README.md
+++ b/models/cv/classification/cspresnet50/ixrt/README.md
@@ -16,13 +16,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install tabulate
-pip3 install onnx
-pip3 install onnxsim
-pip3 install opencv-python==4.6.0.66
-pip3 install mmcls==0.24.0
-pip3 install mmcv==1.5.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -33,12 +27,10 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 
 ```bash
 mkdir checkpoints 
-cd checkpoints
 git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
-cd ..
 
 python3 export_onnx.py   \
-    --config_file ./checkpoints/mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py  \
+    --config_file ./mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py  \
     --checkpoint_file  https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth \
     --output_model ./checkpoints/cspresnet50.onnx
 ```
diff --git a/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh b/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d3e995c46026bcbede7c59a698aff0d69d82bbb1
--- /dev/null
+++ b/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
+mkdir -p checkpoints
+python3 export_onnx.py   \
+    --config_file ./mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py  \
+    --checkpoint_file  /root/data/checkpoints/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth \
+    --output_model ./checkpoints/cspresnet50.onnx
diff --git a/models/cv/classification/cspresnet50/ixrt/inference.py b/models/cv/classification/cspresnet50/ixrt/inference.py
index 1ec56b4a1f09ee4bd7516461f758ac121a5346a0..11a90c79c1364f1195bcc8b43525c901a9d9d6cf 100644
--- a/models/cv/classification/cspresnet50/ixrt/inference.py
+++ b/models/cv/classification/cspresnet50/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/cspresnet50/ixrt/requirements.txt b/models/cv/classification/cspresnet50/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..566974bb7c93c6ce7842a2199473c9518046c6fc
--- /dev/null
+++ b/models/cv/classification/cspresnet50/ixrt/requirements.txt
@@ -0,0 +1,8 @@
+tqdm
+tabulate
+onnx
+onnxsim
+opencv-python==4.6.0.66
+mmcls==0.24.0
+mmcv==1.5.3
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/densenet121/ixrt/ci/prepare.sh b/models/cv/classification/densenet121/ixrt/ci/prepare.sh
index 4892448bb504da93fff2e6914d1d0f3502e0d284..c3103b01b9743dff39465b3490bb157f935ab52e 100644
--- a/models/cv/classification/densenet121/ixrt/ci/prepare.sh
+++ b/models/cv/classification/densenet121/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 mkdir -p /root/.cache/torch/hub/checkpoints/
diff --git a/models/cv/classification/densenet161/ixrt/README.md b/models/cv/classification/densenet161/ixrt/README.md
index cb65f64a23538a7d67ac3ee4fbb171cd89c427c6..5e225c5e414d99326553cb5517ddeaf8e5eb4d81 100644
--- a/models/cv/classification/densenet161/ixrt/README.md
+++ b/models/cv/classification/densenet161/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install cuda-python
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -43,9 +39,9 @@ export DATASETS_DIR=/Path/to/imagenet_val/
 
 ```bash
 # Accuracy
-bash scripts/infer_densenet_fp16_accuracy.sh
+bash scripts/infer_densenet161_fp16_accuracy.sh
 # Performance
-bash scripts/infer_densenet_fp16_performance.sh
+bash scripts/infer_densenet161_fp16_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/classification/densenet161/ixrt/ci/prepare.sh b/models/cv/classification/densenet161/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..79d6d7530c89d0d48387caca785ac97817a7db0d
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+python3 export.py --weight /root/data/checkpoints/densenet161-8d451a50.pth --output densenet161.onnx
\ No newline at end of file
diff --git a/models/cv/classification/densenet161/ixrt/inference.py b/models/cv/classification/densenet161/ixrt/inference.py
index 22f1644ced656c96602e15e468166d6df4fec92c..e7102e507b7244d76122eb3f8fc0f4969e737cf5 100644
--- a/models/cv/classification/densenet161/ixrt/inference.py
+++ b/models/cv/classification/densenet161/ixrt/inference.py
@@ -82,6 +82,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -116,6 +117,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/densenet161/ixrt/requirements.txt b/models/cv/classification/densenet161/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4805e251e80a616696846c241348bef6224c5fca
--- /dev/null
+++ b/models/cv/classification/densenet161/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+tabulate
+cuda-python
\ No newline at end of file
diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_accuracy.sh
rename to models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_accuracy.sh
diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/densenet161/ixrt/scripts/infer_densenet_fp16_performance.sh
rename to models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_performance.sh
diff --git a/models/cv/classification/densenet169/ixrt/README.md b/models/cv/classification/densenet169/ixrt/README.md
index 6abd12c0ff7e6103840c69e4b6e8d629b3037f8a..480e9df61520691c764dff53ee2279f3f0d195f7 100644
--- a/models/cv/classification/densenet169/ixrt/README.md
+++ b/models/cv/classification/densenet169/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install cuda-python
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -44,13 +40,13 @@ export DATASETS_DIR=/Path/to/imagenet_val/
 
 ```bash
 # Accuracy
-bash scripts/infer_densenet_fp16_accuracy.sh
+bash scripts/infer_densenet169_fp16_accuracy.sh
 # Performance
-bash scripts/infer_densenet_fp16_performance.sh
+bash scripts/infer_densenet169_fp16_performance.sh
 ```
 
 ## Results
 
 | Model    | BatchSize | Precision | FPS     | Top-1(%) | Top-5(%) |
 | -------- | --------- | --------- | ------- | -------- | -------- |
-| DenseNet | 32        | FP16      | 1119.69 | 0.7558   | 0.9284   |
+| DenseNet169 | 32        | FP16      | 1119.69 | 0.7558   | 0.9284   |
diff --git a/models/cv/classification/densenet169/ixrt/ci/prepare.sh b/models/cv/classification/densenet169/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..da20933fd38bbf03495e1e60a4440eef86ab68b7
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+python3 export.py --weight /root/data/checkpoints/densenet169-b2777c0a.pth --output densenet169.onnx
\ No newline at end of file
diff --git a/models/cv/classification/densenet169/ixrt/inference.py b/models/cv/classification/densenet169/ixrt/inference.py
index 22f1644ced656c96602e15e468166d6df4fec92c..e7102e507b7244d76122eb3f8fc0f4969e737cf5 100644
--- a/models/cv/classification/densenet169/ixrt/inference.py
+++ b/models/cv/classification/densenet169/ixrt/inference.py
@@ -82,6 +82,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -116,6 +117,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/densenet169/ixrt/requirements.txt b/models/cv/classification/densenet169/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4805e251e80a616696846c241348bef6224c5fca
--- /dev/null
+++ b/models/cv/classification/densenet169/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+tabulate
+cuda-python
\ No newline at end of file
diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_accuracy.sh
rename to models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_accuracy.sh
diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/densenet169/ixrt/scripts/infer_densenet_fp16_performance.sh
rename to models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_performance.sh
diff --git a/models/cv/classification/efficientnet_b0/ixrt/README.md b/models/cv/classification/efficientnet_b0/ixrt/README.md
index eb34b2dbb9741addc55483cef1b9108c46f14348..84065777ad2f7241c2684d0bbbe119788912a3a3 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/README.md
+++ b/models/cv/classification/efficientnet_b0/ixrt/README.md
@@ -27,8 +27,7 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 ### Model Conversion
 
 ```bash
-mkdir -p checkpoints
-python3 export_onnx.py --origin_model /path/to/efficientnet_b0_rwightman-3dd342df.pth --output_model checkpoints/efficientnet_b0.onnx
+python3 export_onnx.py --origin_model /path/to/efficientnet_b0_rwightman-3dd342df.pth --output_model efficientnet_b0.onnx
 ```
 
 ## Inference
diff --git a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
index aba93afdef89b489be1bc9ea1b33ba769e4da364..ec7e84048039fa9146878b1a609780828cbb56bc 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh
@@ -1,3 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
-mkdir checkpoints
-python3 export_onnx.py --origin_model /root/data/checkpoints/efficientnet_b0.pth --output_model checkpoints/efficientnet_b0.onnx
\ No newline at end of file
+python3 export_onnx.py --origin_model /root/data/checkpoints/efficientnet_b0_rwightman-3dd342df.pth --output_model efficientnet_b0.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b0/ixrt/quant copy.py b/models/cv/classification/efficientnet_b0/ixrt/quant copy.py
deleted file mode 100644
index 72f8563867864b83ac3b4dd207bc7f3dc06bb28f..0000000000000000000000000000000000000000
--- a/models/cv/classification/efficientnet_b0/ixrt/quant copy.py	
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-from ppq import *
-from ppq.api import *
-import os
-from calibration_dataset import getdataloader
-import argparse
-import random
-import numpy as np
-import torch
-
-random.seed(42)
-np.random.seed(42)
-torch.manual_seed(42)
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model_name", type=str)
-    parser.add_argument("--model", type=str)
-    parser.add_argument("--dataset_dir", type=str, default="imagenet_val")
-    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"],
-                        default="hist_percentile")
-    parser.add_argument("--disable_quant_names", nargs='*', type=str)
-    parser.add_argument("--save_dir", type=str, help="save path", default=None)
-    parser.add_argument("--bsz", type=int, default=32)
-    parser.add_argument("--step", type=int, default=32)
-    parser.add_argument("--seed", type=int, default=42)
-    parser.add_argument("--imgsz", type=int, default=224)
-    args = parser.parse_args()
-    print("Quant config:", args)
-    print(args.disable_quant_names)
-    return args
-
-
-config = parse_args()
-
-# modify configuration below:
-WORKING_DIRECTORY = 'checkpoints'  # choose your working directory
-TARGET_PLATFORM = TargetPlatform.TRT_INT8  # choose your target platform
-MODEL_TYPE = NetworkFramework.ONNX  # or NetworkFramework.CAFFE
-INPUT_LAYOUT = 'chw'  # input data layout, chw or hwc
-NETWORK_INPUTSHAPE = [config.bsz, 3, 224, 224]  # input shape of your network
-EXECUTING_DEVICE = 'cuda'  # 'cuda' or 'cpu'.
-REQUIRE_ANALYSE = False
-TRAINING_YOUR_NETWORK = True  # 是否需要 Finetuning 一下你的网络
-# -------------------------------------------------------------------
-# 加载你的模型文件，PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式
-# 如果你正使用 pytorch, tensorflow 等框架，你可以先将模型导出成 onnx
-# 使用 torch.onnx.export 即可，如果你在导出 torch 模型时发生错误，欢迎与我们联系。
-# -------------------------------------------------------------------
-graph = None
-if MODEL_TYPE == NetworkFramework.ONNX:
-    graph = load_onnx_graph(onnx_import_file=config.model)
-if MODEL_TYPE == NetworkFramework.CAFFE:
-    graph = load_caffe_graph(
-        caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'),
-        prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt'))
-assert graph is not None, 'Graph Loading Error, Check your input again.'
-
-# -------------------------------------------------------------------
-# SETTING 对象用于控制 PPQ 的量化逻辑，主要描述了图融合逻辑、调度方案、量化细节策略等
-# 当你的网络量化误差过高时，你需要修改 SETTING 对象中的属性来进行特定的优化
-# -------------------------------------------------------------------
-QS = QuantizationSettingFactory.default_setting()
-
-# -------------------------------------------------------------------
-# 下面向你展示了如何使用 finetuning 过程提升量化精度
-# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度
-# 开启他们的方式都是 QS.xxxx = True
-# 按需使用，不要全部打开，容易起飞
-# -------------------------------------------------------------------
-if TRAINING_YOUR_NETWORK:
-    QS.lsq_optimization = True  # 启动网络再训练过程，降低量化误差
-    QS.lsq_optimization_setting.steps = 800  # 再训练步数，影响训练时间，500 步大概几分钟
-    QS.lsq_optimization_setting.collecting_device = 'cpu'  # 缓存数据放在那，cuda 就是放在gpu，如果显存超了你就换成 'cpu'
-
-
-dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz)
-# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x，但是你如果没有装相应编译环境的话是编译不了的
-# 你可以尝试安装编译环境，或者在不启动 CUDA KERNEL 的情况下完成量化：移除 with ENABLE_CUDA_KERNEL(): 即可
-with ENABLE_CUDA_KERNEL():
-    print('网络正量化中，根据你的量化配置，这将需要一段时间:')
-    quantized = quantize_native_model(
-        setting=QS,  # setting 对象用来控制标准量化逻辑
-        model=graph,
-        calib_dataloader=dataloader,
-        calib_steps=config.step,
-        input_shape=NETWORK_INPUTSHAPE,  # 如果你的网络只有一个输入，使用这个参数传参
-        inputs=None,
-        # 如果你的网络有多个输入，使用这个参数传参，就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)]
-        collate_fn=lambda x: x[0].to(EXECUTING_DEVICE),  # collate_fn 跟 torch dataloader 的 collate fn 是一样的，用于数据预处理，
-        # 你当然也可以用 torch dataloader 的那个，然后设置这个为 None
-        platform=TARGET_PLATFORM,
-        device=EXECUTING_DEVICE,
-        do_quantize=True)
-
-    # -------------------------------------------------------------------
-    # 如果你需要执行量化后的神经网络并得到结果，则需要创建一个 executor
-    # 这个 executor 的行为和 torch.Module 是类似的，你可以利用这个东西来获取执行结果
-    # 请注意，必须在 export 之前执行此操作。
-    # -------------------------------------------------------------------
-    executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE)
-    # output = executor.forward(input)
-
-    # -------------------------------------------------------------------
-    # PPQ 计算量化误差时，使用信噪比的倒数作为指标，即噪声能量 / 信号能量
-    # 量化误差 0.1 表示在整体信号中，量化噪声的能量约为 10%
-    # 你应当注意，在 graphwise_error_analyse 分析中，我们衡量的是累计误差
-    # 网络的最后一层往往都具有较大的累计误差，这些误差是其前面的所有层所共同造成的
-    # 你需要使用 layerwise_error_analyse 逐层分析误差的来源
-    # -------------------------------------------------------------------
-    print('正计算网络量化误差(SNR)，最后一层的误差应小于 0.1 以保证量化精度:')
-    reports = graphwise_error_analyse(
-        graph=quantized, running_device=EXECUTING_DEVICE, steps=32,
-        dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE))
-    for op, snr in reports.items():
-        if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著，请考虑进行优化')
-
-    if REQUIRE_ANALYSE:
-        print('正计算逐层量化误差(SNR)，每一层的独立量化误差应小于 0.1 以保证量化精度:')
-        layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE,
-                                interested_outputs=None,
-                                dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))
-
-    # -------------------------------------------------------------------
-    # 使用 export_ppq_graph 函数来导出量化后的模型
-    # PPQ 会根据你所选择的导出平台来修改模型格式
-    # -------------------------------------------------------------------
-    print('网络量化结束，正在生成目标文件:')
-    export_ppq_graph(
-        graph=quantized, platform=TARGET_PLATFORM,
-        graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"),
-        config_save_to=os.path.join(config.save_dir, 'quant_cfg.json'))
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b0/ixrt/quant.py b/models/cv/classification/efficientnet_b0/ixrt/quant.py
index 78aee19e9cd81f9e9ca4a1b76290a0a1d5c40266..72f8563867864b83ac3b4dd207bc7f3dc06bb28f 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/quant.py
+++ b/models/cv/classification/efficientnet_b0/ixrt/quant.py
@@ -13,33 +13,31 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
+from ppq import *
+from ppq.api import *
 import os
-import cv2
-import random
+from calibration_dataset import getdataloader
 import argparse
+import random
 import numpy as np
-from random import shuffle
-from tensorrt.deploy import static_quantize
-
 import torch
-import torchvision.datasets
-from calibration_dataset import getdataloader
 
-def setseed(seed=42):
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
+random.seed(42)
+np.random.seed(42)
+torch.manual_seed(42)
+
 
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--model_name", type=str)
     parser.add_argument("--model", type=str)
     parser.add_argument("--dataset_dir", type=str, default="imagenet_val")
-    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile")
+    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"],
+                        default="hist_percentile")
     parser.add_argument("--disable_quant_names", nargs='*', type=str)
-    parser.add_argument("--save_dir", type=str,  help="save path", default=None)
+    parser.add_argument("--save_dir", type=str, help="save path", default=None)
     parser.add_argument("--bsz", type=int, default=32)
-    parser.add_argument("--step", type=int, default=20)
+    parser.add_argument("--step", type=int, default=32)
     parser.add_argument("--seed", type=int, default=42)
     parser.add_argument("--imgsz", type=int, default=224)
     args = parser.parse_args()
@@ -47,13 +45,103 @@ def parse_args():
     print(args.disable_quant_names)
     return args
 
-args = parse_args()
-setseed(args.seed)
-calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz)
-static_quantize(args.model,
-        calibration_dataloader=calibration_dataloader,
-        save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"),
-        observer=args.observer,
-        data_preprocess=lambda x: x[0].to("cuda"),
-        quant_format="ppq",
-        disable_quant_names=args.disable_quant_names)
\ No newline at end of file
+
+config = parse_args()
+
+# modify configuration below:
+WORKING_DIRECTORY = 'checkpoints'  # choose your working directory
+TARGET_PLATFORM = TargetPlatform.TRT_INT8  # choose your target platform
+MODEL_TYPE = NetworkFramework.ONNX  # or NetworkFramework.CAFFE
+INPUT_LAYOUT = 'chw'  # input data layout, chw or hwc
+NETWORK_INPUTSHAPE = [config.bsz, 3, 224, 224]  # input shape of your network
+EXECUTING_DEVICE = 'cuda'  # 'cuda' or 'cpu'.
+REQUIRE_ANALYSE = False
+TRAINING_YOUR_NETWORK = True  # 是否需要 Finetuning 一下你的网络
+# -------------------------------------------------------------------
+# 加载你的模型文件，PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式
+# 如果你正使用 pytorch, tensorflow 等框架，你可以先将模型导出成 onnx
+# 使用 torch.onnx.export 即可，如果你在导出 torch 模型时发生错误，欢迎与我们联系。
+# -------------------------------------------------------------------
+graph = None
+if MODEL_TYPE == NetworkFramework.ONNX:
+    graph = load_onnx_graph(onnx_import_file=config.model)
+if MODEL_TYPE == NetworkFramework.CAFFE:
+    graph = load_caffe_graph(
+        caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'),
+        prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt'))
+assert graph is not None, 'Graph Loading Error, Check your input again.'
+
+# -------------------------------------------------------------------
+# SETTING 对象用于控制 PPQ 的量化逻辑，主要描述了图融合逻辑、调度方案、量化细节策略等
+# 当你的网络量化误差过高时，你需要修改 SETTING 对象中的属性来进行特定的优化
+# -------------------------------------------------------------------
+QS = QuantizationSettingFactory.default_setting()
+
+# -------------------------------------------------------------------
+# 下面向你展示了如何使用 finetuning 过程提升量化精度
+# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度
+# 开启他们的方式都是 QS.xxxx = True
+# 按需使用，不要全部打开，容易起飞
+# -------------------------------------------------------------------
+if TRAINING_YOUR_NETWORK:
+    QS.lsq_optimization = True  # 启动网络再训练过程，降低量化误差
+    QS.lsq_optimization_setting.steps = 800  # 再训练步数，影响训练时间，500 步大概几分钟
+    QS.lsq_optimization_setting.collecting_device = 'cpu'  # 缓存数据放在那，cuda 就是放在gpu，如果显存超了你就换成 'cpu'
+
+
+dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz)
+# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x，但是你如果没有装相应编译环境的话是编译不了的
+# 你可以尝试安装编译环境，或者在不启动 CUDA KERNEL 的情况下完成量化：移除 with ENABLE_CUDA_KERNEL(): 即可
+with ENABLE_CUDA_KERNEL():
+    print('网络正量化中，根据你的量化配置，这将需要一段时间:')
+    quantized = quantize_native_model(
+        setting=QS,  # setting 对象用来控制标准量化逻辑
+        model=graph,
+        calib_dataloader=dataloader,
+        calib_steps=config.step,
+        input_shape=NETWORK_INPUTSHAPE,  # 如果你的网络只有一个输入，使用这个参数传参
+        inputs=None,
+        # 如果你的网络有多个输入，使用这个参数传参，就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)]
+        collate_fn=lambda x: x[0].to(EXECUTING_DEVICE),  # collate_fn 跟 torch dataloader 的 collate fn 是一样的，用于数据预处理，
+        # 你当然也可以用 torch dataloader 的那个，然后设置这个为 None
+        platform=TARGET_PLATFORM,
+        device=EXECUTING_DEVICE,
+        do_quantize=True)
+
+    # -------------------------------------------------------------------
+    # 如果你需要执行量化后的神经网络并得到结果，则需要创建一个 executor
+    # 这个 executor 的行为和 torch.Module 是类似的，你可以利用这个东西来获取执行结果
+    # 请注意，必须在 export 之前执行此操作。
+    # -------------------------------------------------------------------
+    executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE)
+    # output = executor.forward(input)
+
+    # -------------------------------------------------------------------
+    # PPQ 计算量化误差时，使用信噪比的倒数作为指标，即噪声能量 / 信号能量
+    # 量化误差 0.1 表示在整体信号中，量化噪声的能量约为 10%
+    # 你应当注意，在 graphwise_error_analyse 分析中，我们衡量的是累计误差
+    # 网络的最后一层往往都具有较大的累计误差，这些误差是其前面的所有层所共同造成的
+    # 你需要使用 layerwise_error_analyse 逐层分析误差的来源
+    # -------------------------------------------------------------------
+    print('正计算网络量化误差(SNR)，最后一层的误差应小于 0.1 以保证量化精度:')
+    reports = graphwise_error_analyse(
+        graph=quantized, running_device=EXECUTING_DEVICE, steps=32,
+        dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE))
+    for op, snr in reports.items():
+        if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著，请考虑进行优化')
+
+    if REQUIRE_ANALYSE:
+        print('正计算逐层量化误差(SNR)，每一层的独立量化误差应小于 0.1 以保证量化精度:')
+        layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE,
+                                interested_outputs=None,
+                                dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))
+
+    # -------------------------------------------------------------------
+    # 使用 export_ppq_graph 函数来导出量化后的模型
+    # PPQ 会根据你所选择的导出平台来修改模型格式
+    # -------------------------------------------------------------------
+    print('网络量化结束，正在生成目标文件:')
+    export_ppq_graph(
+        graph=quantized, platform=TARGET_PLATFORM,
+        graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"),
+        config_save_to=os.path.join(config.save_dir, 'quant_cfg.json'))
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh
index ea8c545fb648e0a4bfceb38d7bea4e5a080ad37a..dfc034b931c2869d726fd2aa7eb66a5ef0013339 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh
@@ -14,7 +14,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 batchsize=32
-model_path="checkpoints/efficientnet_b0"
+model_path="efficientnet_b0"
 # model_path="resnet18"
 datasets_path=${DATASETS_DIR}
 
diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh
index f884480200669d435b1b957ccb32ce4657adf102..818c066d1eb0b0da8efc6fc4e0de2552c8ecbfc8 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh
@@ -14,7 +14,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 batchsize=32
-model_path="checkpoints/efficientnet_b0"
+model_path="efficientnet_b0"
 # model_path="resnet18"
 datasets_path=${DATASETS_DIR}
 
diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh
index 6f3584c96cdd48d27c87ad3382e214514c9e04f2..97892512de566ab89b00b361739f11de163c2433 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh
@@ -14,7 +14,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 batchsize=32
-model_path="checkpoints/efficientnet_b0"
+model_path="efficientnet_b0"
 # model_path="resnet18"
 datasets_path=${DATASETS_DIR}
 
diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh
index 8989c2db792bd503c5e2a39fb34285321c7b6412..e016fd1a4c3dfafc08762ccdb03e7ed02c46c105 100644
--- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh
+++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh
@@ -14,7 +14,7 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 batchsize=32
-model_path="checkpoints/efficientnet_b0"
+model_path="efficientnet_b0"
 # model_path="resnet18"
 datasets_path=${DATASETS_DIR}
 
diff --git a/models/cv/classification/efficientnet_b1/ixrt/README.md b/models/cv/classification/efficientnet_b1/ixrt/README.md
index 3e09969a9814a0cfeda9a5528276c604177d0c3b..282b6440b765402baa6a520b4027219ae03fe41c 100644
--- a/models/cv/classification/efficientnet_b1/ixrt/README.md
+++ b/models/cv/classification/efficientnet_b1/ixrt/README.md
@@ -15,10 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d9e780f21f5603d601fb6cbb12a2422cc7df59e1
--- /dev/null
+++ b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/efficientnet_b1_rwightman-bac287d4.pth /root/.cache/torch/hub/checkpoints/efficientnet_b1_rwightman-bac287d4.pth
+python3 export_onnx.py --output_model checkpoints/efficientnet-b1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b1/ixrt/inference.py b/models/cv/classification/efficientnet_b1/ixrt/inference.py
index 1ec56b4a1f09ee4bd7516461f758ac121a5346a0..11a90c79c1364f1195bcc8b43525c901a9d9d6cf 100644
--- a/models/cv/classification/efficientnet_b1/ixrt/inference.py
+++ b/models/cv/classification/efficientnet_b1/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/efficientnet_b1/ixrt/requirements.txt b/models/cv/classification/efficientnet_b1/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc645b22bbcf3675e3bfe6f8d2da91ca8c089b3a
--- /dev/null
+++ b/models/cv/classification/efficientnet_b1/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+tabulate
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/ixrt/README.md b/models/cv/classification/efficientnet_b2/ixrt/README.md
index 6a3e2bdb7bfb5d642b57d4f099a81243ea7540b3..cc22549122bcc9ae4da6502a3d1dc511b82cbfd3 100644
--- a/models/cv/classification/efficientnet_b2/ixrt/README.md
+++ b/models/cv/classification/efficientnet_b2/ixrt/README.md
@@ -15,10 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9ec3aff1152f60de69d1f5defdfc9b841fe4da79
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+python3 export.py --weight /root/data/checkpoints/efficientnet_b2_rwightman-c35c1473.pth --output efficientnet_b2.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_b2/ixrt/inference.py b/models/cv/classification/efficientnet_b2/ixrt/inference.py
index 4afba6bcd2b951b51a448ed0d2ba3cf28678945a..e5a03525d992b741a2b2de63a76d3631e91de31f 100644
--- a/models/cv/classification/efficientnet_b2/ixrt/inference.py
+++ b/models/cv/classification/efficientnet_b2/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -117,6 +118,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/efficientnet_b2/ixrt/requirements.txt b/models/cv/classification/efficientnet_b2/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1eda59c3910ca96c73128bab86d534dbd55bbae
--- /dev/null
+++ b/models/cv/classification/efficientnet_b2/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tqdm
+onnx
+onnxsim
+tabulate
\ No newline at end of file
diff --git a/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh
index 00824ca401c354de0dab84df85d6ae70567b0634..89b3d7e0f3e0ede46adeb2a033015c865e349cd4 100644
--- a/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh
+++ b/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir -p checkpoints
 unzip /root/data/repos/pytorch-image-models-a852318b636a8.zip -d ./
diff --git a/models/cv/classification/googlenet/ixrt/ci/prepare.sh b/models/cv/classification/googlenet/ixrt/ci/prepare.sh
index d1753c6aeb388d3442d4b3cf1bdfa80d8e71a494..8a8c7769f29d9b28bf27754e4d2f8f2b52bdb13a 100644
--- a/models/cv/classification/googlenet/ixrt/ci/prepare.sh
+++ b/models/cv/classification/googlenet/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/googlenet.pth --output_model checkpoints/googlenet.onnx
\ No newline at end of file
diff --git a/models/cv/classification/hrnet_w18/ixrt/README.md b/models/cv/classification/hrnet_w18/ixrt/README.md
index 278d5427e513093372c6e8626595d4a4987fc296..2d38c4d463e6f06a402c2c8f65c9fd90708dc3b5 100644
--- a/models/cv/classification/hrnet_w18/ixrt/README.md
+++ b/models/cv/classification/hrnet_w18/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install ppq
-pip3 install mmpretrain
-pip3 install mmcv-lite
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh b/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..dbb45d9af4b002fe448c60fadadae51fbe367af8
--- /dev/null
+++ b/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth /root/.cache/torch/hub/checkpoints/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth
+python3 export_onnx.py --output_model checkpoints/hrnet-w18.onnx
\ No newline at end of file
diff --git a/models/cv/classification/hrnet_w18/ixrt/inference.py b/models/cv/classification/hrnet_w18/ixrt/inference.py
index 86f0cdf26617350b269a3f17d875869740f1ff02..47a3f640f5e05b5832eea0e95bd3db7f6a4e6c9d 100644
--- a/models/cv/classification/hrnet_w18/ixrt/inference.py
+++ b/models/cv/classification/hrnet_w18/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -104,7 +105,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/hrnet_w18/ixrt/requirements.txt b/models/cv/classification/hrnet_w18/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d0f090a4e7f50dab602927aa50a9c8da86b4cd4
--- /dev/null
+++ b/models/cv/classification/hrnet_w18/ixrt/requirements.txt
@@ -0,0 +1,9 @@
+tqdm
+onnx
+onnxsim
+tabulate
+ppq
+mmpretrain
+mmcv-lite
+pycuda
+transformers==4.37.1
\ No newline at end of file
diff --git a/models/cv/classification/inception_v3/ixrt/ci/prepare.sh b/models/cv/classification/inception_v3/ixrt/ci/prepare.sh
index 82636b364674f53ef4f6fb998f9ca98ade919d05..a9b110fd5b38dd9e03f3c87be750030fdce50051 100644
--- a/models/cv/classification/inception_v3/ixrt/ci/prepare.sh
+++ b/models/cv/classification/inception_v3/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/inception_v3.pth --output_model checkpoints/inception_v3.onnx
\ No newline at end of file
diff --git a/models/cv/classification/inceptionresnetv2/ixrt/README.md b/models/cv/classification/inceptionresnetv2/ixrt/README.md
index 6469019389d5b4e42107ea77da0f70fdfb5a0ae3..e55a13a23ee5de7a231be1bfc46963a7cd3d1159 100755
--- a/models/cv/classification/inceptionresnetv2/ixrt/README.md
+++ b/models/cv/classification/inceptionresnetv2/ixrt/README.md
@@ -15,17 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install Pillow
-pip3 install tabulate
-pip3 install pycuda
-pip3 install opencv-python==4.6.0.66
-pip3 install ppq
-pip3 install protobuf==3.20.0
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/inceptionresnetv2/ixrt/ci/prepare.sh b/models/cv/classification/inceptionresnetv2/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2221e9b35aadf9c0954b317c2738da2024f0d0f6
--- /dev/null
+++ b/models/cv/classification/inceptionresnetv2/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/inceptionresnetv2-520b38e4.pth /root/.cache/torch/hub/checkpoints/inceptionresnetv2-520b38e4.pth
+python3 export_model.py --output_model ./checkpoints/inceptionresnetv2.onnx
\ No newline at end of file
diff --git a/models/cv/classification/inceptionresnetv2/ixrt/inference.py b/models/cv/classification/inceptionresnetv2/ixrt/inference.py
index 569cad96b18e3c46ba49db0a12cbf96c805f2591..17f473bf0ce6173285225a479da901d3e5ae9cb4 100644
--- a/models/cv/classification/inceptionresnetv2/ixrt/inference.py
+++ b/models/cv/classification/inceptionresnetv2/ixrt/inference.py
@@ -86,7 +86,7 @@ def main(config):
 
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
-        
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
@@ -110,7 +110,8 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        e2e_time = time.time() - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/inceptionresnetv2/ixrt/requirements.txt b/models/cv/classification/inceptionresnetv2/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d96aa09700bcd37d95057ee6f3c44e07a695394
--- /dev/null
+++ b/models/cv/classification/inceptionresnetv2/ixrt/requirements.txt
@@ -0,0 +1,11 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+Pillow
+tabulate
+pycuda
+opencv-python==4.6.0.66
+ppq
+protobuf==3.20.0
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh b/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh
index aa0cae6e678dff13d1c2c451b30c79cee06e2ac3..277cd41a5a84724a83df9d4bb1f5958b2deca06d 100644
--- a/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh
+++ b/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/mobilenet_v2.pth --output_model checkpoints/mobilenet_v2.onnx
\ No newline at end of file
diff --git a/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh b/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh
index 34932cee67b8e17e65c9e9a84b8bc06f9a695284..31817b28a8b26c3d0f760fe6f8fa1b42b53a1b81 100644
--- a/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh
+++ b/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/mobilenet_v3.pth --output_model checkpoints/mobilenet_v3.onnx
\ No newline at end of file
diff --git a/models/cv/classification/repvgg/ixrt/README.md b/models/cv/classification/repvgg/ixrt/README.md
index 37bbbcd480afdbddb3fd622c2eeacb59f2faeacb..e93c626a8c7f26e17dd7b152edd8b02af3682fbc 100644
--- a/models/cv/classification/repvgg/ixrt/README.md
+++ b/models/cv/classification/repvgg/ixrt/README.md
@@ -16,13 +16,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install tabulate
-pip3 install onnx
-pip3 install onnxsim
-pip3 install opencv-python==4.6.0.66
-pip3 install mmcls==0.24.0
-pip3 install mmcv==1.5.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -33,12 +27,10 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 
 ```bash
 mkdir checkpoints 
-cd checkpoints
 git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
-cd ..
 
 python3 export_onnx.py   \
-    --config_file ./checkpoints/mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \
+    --config_file ./mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \
     --checkpoint_file https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth \
     --output_model ./checkpoints/repvgg_A0.onnx
 ```
diff --git a/models/cv/classification/repvgg/ixrt/ci/prepare.sh b/models/cv/classification/repvgg/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..221eb5dd7975b47d959d34e6af259a43c68b7983
--- /dev/null
+++ b/models/cv/classification/repvgg/ixrt/ci/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
+mkdir -p checkpoints 
+python3 export_onnx.py   \
+    --config_file ./mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \
+    --checkpoint_file /root/data/checkpoints/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth \
+    --output_model ./checkpoints/repvgg_A0.onnx
\ No newline at end of file
diff --git a/models/cv/classification/repvgg/ixrt/export_onnx.py b/models/cv/classification/repvgg/ixrt/export_onnx.py
index 254f3a0e0ac08e8307171bc2f6c507581409ff74..9ba381cc39d55191045a159b46c2db2a8f0ed031 100644
--- a/models/cv/classification/repvgg/ixrt/export_onnx.py
+++ b/models/cv/classification/repvgg/ixrt/export_onnx.py
@@ -42,7 +42,7 @@ args = parse_args()
 config_file = args.config_file
 checkpoint_file = args.checkpoint_file
 model = Model().eval()
-x = torch.zeros(1, 3, 224, 224).to(device)
+x = torch.zeros(32, 3, 224, 224).to(device)
 with torch.no_grad():
     output = model(x)
   
diff --git a/models/cv/classification/repvgg/ixrt/inference.py b/models/cv/classification/repvgg/ixrt/inference.py
index 1ec56b4a1f09ee4bd7516461f758ac121a5346a0..11a90c79c1364f1195bcc8b43525c901a9d9d6cf 100644
--- a/models/cv/classification/repvgg/ixrt/inference.py
+++ b/models/cv/classification/repvgg/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/repvgg/ixrt/requirements.txt b/models/cv/classification/repvgg/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..566974bb7c93c6ce7842a2199473c9518046c6fc
--- /dev/null
+++ b/models/cv/classification/repvgg/ixrt/requirements.txt
@@ -0,0 +1,8 @@
+tqdm
+tabulate
+onnx
+onnxsim
+opencv-python==4.6.0.66
+mmcls==0.24.0
+mmcv==1.5.3
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/res2net50/ixrt/ci/prepare.sh b/models/cv/classification/res2net50/ixrt/ci/prepare.sh
index d9c0022795f4ecd9dee9e312b4e8badb7fae3b7c..9725f3660ae192fe5c0dbb12ed2ec85886f9472f 100644
--- a/models/cv/classification/res2net50/ixrt/ci/prepare.sh
+++ b/models/cv/classification/res2net50/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/res2net50.pth --output_model checkpoints/res2net50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnet101/ixrt/README.md b/models/cv/classification/resnet101/ixrt/README.md
index 92d6603e21ee4d21ead489aea9b517e09aa87026..d85e312077a4e26b1711986b3fcd5559ff39a2bf 100644
--- a/models/cv/classification/resnet101/ixrt/README.md
+++ b/models/cv/classification/resnet101/ixrt/README.md
@@ -15,10 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
+pip3 install -r reuirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/resnet101/ixrt/ci/prepare.sh b/models/cv/classification/resnet101/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..701c19ee642e67bfdae94541ecdb3f4bc31cd52b
--- /dev/null
+++ b/models/cv/classification/resnet101/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/resnet101-63fe2227.pth /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
+python3 export_onnx.py --output_model checkpoints/resnet101.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnet101/ixrt/inference.py b/models/cv/classification/resnet101/ixrt/inference.py
index 1ec56b4a1f09ee4bd7516461f758ac121a5346a0..11a90c79c1364f1195bcc8b43525c901a9d9d6cf 100644
--- a/models/cv/classification/resnet101/ixrt/inference.py
+++ b/models/cv/classification/resnet101/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/resnet101/ixrt/requirements.txt b/models/cv/classification/resnet101/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc645b22bbcf3675e3bfe6f8d2da91ca8c089b3a
--- /dev/null
+++ b/models/cv/classification/resnet101/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+tabulate
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/resnet18/ixrt/ci/prepare.sh b/models/cv/classification/resnet18/ixrt/ci/prepare.sh
index bfa46b49a4b32a7eb08f77f15288799e4ad0db2d..eadb2c8f9eabec0509372623ea91b1e7c28e420d 100644
--- a/models/cv/classification/resnet18/ixrt/ci/prepare.sh
+++ b/models/cv/classification/resnet18/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/resnet18.pth --output_model checkpoints/resnet18.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnet34/ixrt/README.md b/models/cv/classification/resnet34/ixrt/README.md
index 243c536ce89fd06d45c9d21adf8d1c1baa7c333b..8855611eb32940951ea805d628c6a4fac27820f0 100644
--- a/models/cv/classification/resnet34/ixrt/README.md
+++ b/models/cv/classification/resnet34/ixrt/README.md
@@ -15,10 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/resnet34/ixrt/ci/prepare.sh b/models/cv/classification/resnet34/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7916795d08ed3978807c88e33178f632874c1a4a
--- /dev/null
+++ b/models/cv/classification/resnet34/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/resnet34-b627a593.pth /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
+python3 export_onnx.py --output_model checkpoints/resnet34.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnet34/ixrt/inference.py b/models/cv/classification/resnet34/ixrt/inference.py
index 2c9dcb3f9cc5b9a26903651a31fafa16d8f0db31..77a1888db603a6f5102586716326d1aaf0518fd0 100644
--- a/models/cv/classification/resnet34/ixrt/inference.py
+++ b/models/cv/classification/resnet34/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,6 +106,9 @@ def main(config):
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
 
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/resnet34/ixrt/requirements.txt b/models/cv/classification/resnet34/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc645b22bbcf3675e3bfe6f8d2da91ca8c089b3a
--- /dev/null
+++ b/models/cv/classification/resnet34/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+tabulate
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/resnet50/ixrt/ci/prepare.sh b/models/cv/classification/resnet50/ixrt/ci/prepare.sh
index 5dddbc64fb6846f72baa1092e55a04d7c7a90a94..7bdae79a0d817c9a4b7173e936c4695788e4dc5d 100644
--- a/models/cv/classification/resnet50/ixrt/ci/prepare.sh
+++ b/models/cv/classification/resnet50/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/resnet50.pth --output_model checkpoints/resnet50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/README.md b/models/cv/classification/resnetv1d50/ixrt/README.md
index 0214bbd6d8bb5ebb75dab05f9a9b04c6145f5d92..3b49e01e335238b7ba4a66afb42f9f64331f1a3c 100644
--- a/models/cv/classification/resnetv1d50/ixrt/README.md
+++ b/models/cv/classification/resnetv1d50/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install ppq
-pip3 install mmpretrain
-pip3 install mmcv-lite
+pip3 install -r requirments.txt
 ```
 
 ### Download
@@ -41,25 +35,25 @@ python3 export_onnx.py --output_model checkpoints/resnet_v1_d50.onnx
 export DATASETS_DIR=/path/to/imagenet_val/
 export CHECKPOINTS_DIR=./checkpoints
 export RUN_DIR=./
-export CONFIG_DIR=config/RESNET_V1_D50_CONFIG
+export CONFIG_DIR=config/RESNETV1D50_CONFIG
 ```
 
 ### FP16
 
 ```bash
 # Accuracy
-bash scripts/infer_resnet_v1_d50_fp16_accuracy.sh
+bash scripts/infer_resnetv1d50_fp16_accuracy.sh
 # Performance
-bash scripts/infer_resnet_v1_d50_fp16_performance.sh
+bash scripts/infer_resnetv1d50_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_resnet_v1_d50_int8_accuracy.sh
+bash scripts/infer_resnetv1d50_int8_accuracy.sh
 # Performance
-bash scripts/infer_resnet_v1_d50_int8_performance.sh
+bash scripts/infer_resnetv1d50_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e4f74d0fc78233f8efbc9a416a0b08c925dc1f53
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints
+mkdir -p /root/.cache/torch/hub/checkpoints/
+ln -s /root/data/checkpoints/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth /root/.cache/torch/hub/checkpoints/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth
+python3 export_onnx.py --output_model checkpoints/resnet_v1_d50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/config/RESNET_V1_D50_CONFIG b/models/cv/classification/resnetv1d50/ixrt/config/RESNETV1D50_CONFIG
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/config/RESNET_V1_D50_CONFIG
rename to models/cv/classification/resnetv1d50/ixrt/config/RESNETV1D50_CONFIG
diff --git a/models/cv/classification/resnetv1d50/ixrt/inference.py b/models/cv/classification/resnetv1d50/ixrt/inference.py
index 2c9dcb3f9cc5b9a26903651a31fafa16d8f0db31..6d34c88c5171966d93de058ee1baa653efe5a9ff 100644
--- a/models/cv/classification/resnetv1d50/ixrt/inference.py
+++ b/models/cv/classification/resnetv1d50/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -104,7 +105,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/resnetv1d50/ixrt/requirements.txt b/models/cv/classification/resnetv1d50/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d0f090a4e7f50dab602927aa50a9c8da86b4cd4
--- /dev/null
+++ b/models/cv/classification/resnetv1d50/ixrt/requirements.txt
@@ -0,0 +1,9 @@
+tqdm
+onnx
+onnxsim
+tabulate
+ppq
+mmpretrain
+mmcv-lite
+pycuda
+transformers==4.37.1
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_fp16_accuracy.sh
rename to models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_fp16_performance.sh
rename to models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_int8_accuracy.sh
rename to models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
similarity index 100%
rename from models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnet_v1_d50_int8_performance.sh
rename to models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/README.md b/models/cv/classification/resnext50_32x4d/ixrt/README.md
index 79d140cdca32e0d9fce4f07952ef58bf743fff59..0c7ed2fe0e20e82535660fbe4e707bf3c18e1371 100644
--- a/models/cv/classification/resnext50_32x4d/ixrt/README.md
+++ b/models/cv/classification/resnext50_32x4d/ixrt/README.md
@@ -9,14 +9,7 @@ The ResNeXt50_32x4d model is a convolutional neural network architecture designe
 ### Install
 
 ```bash
-pip3 install onnx
-pip3 install tqdm
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install ppq
-pip3 install cuda-python
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f264fc493c1a583bff33be5d625c533f99cb1f49
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+python3 export.py --weight /root/data/checkpoints/resnext50_32x4d-7cdf4587.pth --output resnext50_32x4d.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/inference.py b/models/cv/classification/resnext50_32x4d/ixrt/inference.py
index 4afba6bcd2b951b51a448ed0d2ba3cf28678945a..e5a03525d992b741a2b2de63a76d3631e91de31f 100644
--- a/models/cv/classification/resnext50_32x4d/ixrt/inference.py
+++ b/models/cv/classification/resnext50_32x4d/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -117,6 +118,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         err, = cudart.cudaFree(inputs[0]["allocation"])
         assert err == cudart.cudaError_t.cudaSuccess
         err, = cudart.cudaFree(outputs[0]["allocation"])
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/requirements.txt b/models/cv/classification/resnext50_32x4d/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54599ec2e0e86396e2c3c8cdb87ac005b943ca39
--- /dev/null
+++ b/models/cv/classification/resnext50_32x4d/ixrt/requirements.txt
@@ -0,0 +1,8 @@
+onnx
+tqdm
+tqdm
+onnx
+onnxsim
+tabulate
+ppq
+cuda-python
\ No newline at end of file
diff --git a/models/cv/classification/shufflenet_v1/ixrt/README.md b/models/cv/classification/shufflenet_v1/ixrt/README.md
index 4dee5cae70e0259f6abf2d47d90b6689d3e0b758..94c884328cdeaa9076fe3bcac29b41603d2751e7 100644
--- a/models/cv/classification/shufflenet_v1/ixrt/README.md
+++ b/models/cv/classification/shufflenet_v1/ixrt/README.md
@@ -29,12 +29,10 @@ Dataset: <https://www.image-net.org/download.php> to download the validation dat
 
 ```bash
 mkdir checkpoints 
-cd checkpoints
 git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git
-cd ..
 
 python3 export_onnx.py   \
-    --config_file ./checkpoints/mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py  \
+    --config_file ./mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py  \
     --checkpoint_file  ./shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth \
     --output_model ./checkpoints/shufflenet_v1.onnx
 ```
diff --git a/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh b/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh
index a426ed334df241c83cf6932c2a60464b39e0bb97..bea7f22b2525ca1c33af3f281814a2c59a94c11a 100644
--- a/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh
+++ b/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh
@@ -14,10 +14,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir -p checkpoints
-unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./checkpoints/
+unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
 python3 export_onnx.py   \
---config_file ./checkpoints/mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py  \
+--config_file ./mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py  \
 --checkpoint_file  /root/data/checkpoints/shufflenet_v1.pth \
 --output_model ./checkpoints/shufflenet_v1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/README.md b/models/cv/classification/squeezenet_v1_0/ixrt/README.md
similarity index 79%
rename from models/cv/classification/squeezenet_1.0/ixrt/README.md
rename to models/cv/classification/squeezenet_v1_0/ixrt/README.md
index b1d51b640eee503e5f44f82ed0693ef7c46d31b8..6af82041a9d801ec51b70b083eadd78483d228bf 100644
--- a/models/cv/classification/squeezenet_1.0/ixrt/README.md
+++ b/models/cv/classification/squeezenet_v1_0/ixrt/README.md
@@ -17,9 +17,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnxsim
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -42,25 +40,25 @@ export PROJ_DIR=./
 export DATASETS_DIR=/path/to/imagenet_val/
 export CHECKPOINTS_DIR=./checkpoints
 export RUN_DIR=./
-export CONFIG_DIR=config/SQUEEZENET_V10_CONFIG
+export CONFIG_DIR=config/SQUEEZENET_V1_0_CONFIG
 ```
 
 ### FP16
 
 ```bash
 # Accuracy
-bash scripts/infer_squeezenet_v10_fp16_accuracy.sh
+bash scripts/infer_squeezenet_v1_0_fp16_accuracy.sh
 # Performance
-bash scripts/infer_squeezenet_v10_fp16_performance.sh
+bash scripts/infer_squeezenet_v1_0_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_squeezenet_v10_int8_accuracy.sh
+bash scripts/infer_squeezenet_v1_0_int8_accuracy.sh
 # Performance
-bash scripts/infer_squeezenet_v10_int8_performance.sh
+bash scripts/infer_squeezenet_v1_0_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/build_engine.py b/models/cv/classification/squeezenet_v1_0/ixrt/build_engine.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/build_engine.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/build_engine.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/calibration_dataset.py b/models/cv/classification/squeezenet_v1_0/ixrt/calibration_dataset.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/calibration_dataset.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/calibration_dataset.py
diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh b/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..063ff0eca7d8ab331444169d93a20dc4645da8ea
--- /dev/null
+++ b/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir checkpoints 
+python3 export_onnx.py --origin_model /root/data/checkpoints/squeezenet1_0-b66bff10.pth --output_model checkpoints/squeezenetv10.onnx
\ No newline at end of file
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/common.py b/models/cv/classification/squeezenet_v1_0/ixrt/common.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/common.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/common.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/config/SQUEEZENET_V10_CONFIG b/models/cv/classification/squeezenet_v1_0/ixrt/config/SQUEEZENET_V1_0_CONFIG
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/config/SQUEEZENET_V10_CONFIG
rename to models/cv/classification/squeezenet_v1_0/ixrt/config/SQUEEZENET_V1_0_CONFIG
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/export_onnx.py b/models/cv/classification/squeezenet_v1_0/ixrt/export_onnx.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/export_onnx.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/export_onnx.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/inference.py b/models/cv/classification/squeezenet_v1_0/ixrt/inference.py
similarity index 97%
rename from models/cv/classification/squeezenet_1.0/ixrt/inference.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/inference.py
index 1ec56b4a1f09ee4bd7516461f758ac121a5346a0..11a90c79c1364f1195bcc8b43525c901a9d9d6cf 100644
--- a/models/cv/classification/squeezenet_1.0/ixrt/inference.py
+++ b/models/cv/classification/squeezenet_v1_0/ixrt/inference.py
@@ -84,6 +84,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,7 +106,9 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
-
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/modify_batchsize.py b/models/cv/classification/squeezenet_v1_0/ixrt/modify_batchsize.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/modify_batchsize.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/modify_batchsize.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/quant.py b/models/cv/classification/squeezenet_v1_0/ixrt/quant.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/quant.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/quant.py
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/refine_model.py b/models/cv/classification/squeezenet_v1_0/ixrt/refine_model.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/refine_model.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/refine_model.py
diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt b/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ea6ea9020b34ae4f142ce3b1f96e7321029b802
--- /dev/null
+++ b/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tqdm
+onnxsim
+opencv-python==4.6.0.66
+pycuda
\ No newline at end of file
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_accuracy.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_accuracy.sh
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_accuracy.sh
rename to models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_accuracy.sh
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_performance.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_performance.sh
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_fp16_performance.sh
rename to models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_fp16_performance.sh
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_accuracy.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_accuracy.sh
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_accuracy.sh
rename to models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_accuracy.sh
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_performance.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_performance.sh
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/scripts/infer_squeezenet_v10_int8_performance.sh
rename to models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v1_0_int8_performance.sh
diff --git a/models/cv/classification/squeezenet_1.0/ixrt/simplify_model.py b/models/cv/classification/squeezenet_v1_0/ixrt/simplify_model.py
similarity index 100%
rename from models/cv/classification/squeezenet_1.0/ixrt/simplify_model.py
rename to models/cv/classification/squeezenet_v1_0/ixrt/simplify_model.py
diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh b/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh
index b80c01e5d6590a25e0e5410ad0de9d2b40c70b49..b88bcb1f4436981f1bc008920402a8135de1eb7b 100644
--- a/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh
+++ b/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/squeezenet_v1_1.pth --output_model checkpoints/squeezenet_v1_1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/swin_transformer_large/ixrt/README.md b/models/cv/classification/swin_transformer_large/ixrt/README.md
index 7f2282f9fcfdfc756cebc98df53c11c0d03c339c..e6cf19d4a5ca943f0a1a74608613a4f56bbd8f8b 100644
--- a/models/cv/classification/swin_transformer_large/ixrt/README.md
+++ b/models/cv/classification/swin_transformer_large/ixrt/README.md
@@ -15,16 +15,7 @@ cd ${MODEL_PATH}
 
 apt install -y libnuma-dev libgl1-mesa-glx
 
-pip3 install onnxsim
-pip3 install onnx_graphsurgeon
-pip3 install scikit-learn
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -54,8 +45,10 @@ python3 torch2onnx.py --model_path ./general_perf/model_zoo/popular/swin-large/s
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./swin-large-torch-fp32
-export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -79,13 +72,14 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 
 # copy data
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 cp -r datasets/open_imagenet/* ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/
 mkdir -p ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
 cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/general_perf/model_zoo/popular/swin-large
 
 # run acc scripts
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/swin-large-torch-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/swin-large-torch-fp32.json
 python3 core/perf_engine.py --hardware_type ILUVATAR --task swin-large-torch-fp32
 ```
 
diff --git a/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b7fe2e695819ea348d0045c5774cc5e7af8037f2
--- /dev/null
+++ b/models/cv/classification/swin_transformer_large/ixrt/ci/prepare.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+apt install -y libnuma-dev
+
+pip install -r requirements.txt
+mkdir -p general_perf/model_zoo/regular
+mkdir -p general_perf/model_zoo/popular
+mkdir -p general_perf/model_zoo/sota
+
+cp /root/data/3rd_party/swin-large-torch-fp32.json ./
+cp -r /root/data/checkpoints/swin-large ./general_perf/model_zoo/popular/
+
+python3 torch2onnx.py --model_path ./general_perf/model_zoo/popular/swin-large/swin-transformer-large.pt --output_path swin-large-torch-fp32.onnx
+
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+
+# copy data
+cp -r /root/data/datasets/open_imagenet/* ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/swin-large
+cp general_perf/model_zoo/popular/swin-large/* ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/swin-large
+
+cp -r /root/data/3rd_party/workloads ./ByteMLPerf/byte_infer_perf/general_perf/
\ No newline at end of file
diff --git a/models/cv/classification/swin_transformer_large/ixrt/perf_engine.py b/models/cv/classification/swin_transformer_large/ixrt/perf_engine.py
deleted file mode 100644
index 089d9860f573bba7e19f84aa20fb830a8fcc22d8..0000000000000000000000000000000000000000
--- a/models/cv/classification/swin_transformer_large/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
diff --git a/models/cv/classification/swin_transformer_large/ixrt/requirements.txt b/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c46a0dcfcb32bc3061f061fb162fa1a067b331a1
--- /dev/null
+++ b/models/cv/classification/swin_transformer_large/ixrt/requirements.txt
@@ -0,0 +1,10 @@
+onnxsim
+onnx_graphsurgeon
+scikit-learn
+tqdm
+pycuda
+onnx
+tabulate
+pycocotools
+opencv-python==4.6.0.66
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/cv/classification/vgg16/ixrt/ci/prepare.sh b/models/cv/classification/vgg16/ixrt/ci/prepare.sh
index 7d8e52b09d8e6616825fb5f54c12bb8cfc70aa55..7492df8c2a1294fc3aa5a5e574292afb4d284f2f 100644
--- a/models/cv/classification/vgg16/ixrt/ci/prepare.sh
+++ b/models/cv/classification/vgg16/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir checkpoints
 python3 export_onnx.py --origin_model /root/data/checkpoints/vgg16.pth --output_model checkpoints/vgg16.onnx
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/README.md b/models/cv/classification/wide_resnet50/ixrt/README.md
index 72dd1308b11b2dd7f6237e8c7ec782c99107e0c2..72fd5b4986a20b25edc633fe87de75560c5c5045 100644
--- a/models/cv/classification/wide_resnet50/ixrt/README.md
+++ b/models/cv/classification/wide_resnet50/ixrt/README.md
@@ -9,8 +9,7 @@ The distinguishing feature of Wide ResNet50 lies in its widened architecture com
 ### Install
 
 ```bash
-pip3 install onnx
-pip3 install tqdm
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh b/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8f0dd69b452c8ba672f53763e428087ade93208e
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+mkdir -p checkpoints/
+python3 export.py --weight /root/data/checkpoints/wide_resnet50_2-95faca4d.pth --output checkpoints/wide_resnet50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/wide_resnet50/ixrt/inference.py b/models/cv/classification/wide_resnet50/ixrt/inference.py
index 2c9dcb3f9cc5b9a26903651a31fafa16d8f0db31..77a1888db603a6f5102586716326d1aaf0518fd0 100644
--- a/models/cv/classification/wide_resnet50/ixrt/inference.py
+++ b/models/cv/classification/wide_resnet50/ixrt/inference.py
@@ -83,6 +83,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -105,6 +106,9 @@ def main(config):
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
 
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/classification/wide_resnet50/ixrt/requirements.txt b/models/cv/classification/wide_resnet50/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..424e6007e77afcd8452421bf066bd8e8db67cb88
--- /dev/null
+++ b/models/cv/classification/wide_resnet50/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+onnx
+tqdm
+pycuda
+ppq==0.6.6
\ No newline at end of file
diff --git a/models/cv/detection/centernet/ixrt/README.md b/models/cv/detection/centernet/ixrt/README.md
index e7b51ca94ec92bce186b2e78456d4b6f9e87969c..8978b2b49daa7daa333cd113d5fe2259d2222687 100644
--- a/models/cv/detection/centernet/ixrt/README.md
+++ b/models/cv/detection/centernet/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install onnx
-pip3 install tqdm
-pip3 install mmdet
-pip3 install mmdeploy
-pip3 install mmengine
+pip3 install -r requirements.txt
 # Contact the Iluvatar administrator to get the mmcv install package.
 ```
 
diff --git a/models/cv/detection/centernet/ixrt/ci/prepare.sh b/models/cv/detection/centernet/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fb6ab855995c0b561d72b81fd2aae7c3218cffdc
--- /dev/null
+++ b/models/cv/detection/centernet/ixrt/ci/prepare.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+python3 export.py --weight /root/data/checkpoints/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth --cfg centernet_r18_8xb16-crop512-140e_coco.py --output centernet.onnx
\ No newline at end of file
diff --git a/models/cv/detection/centernet/ixrt/inference.py b/models/cv/detection/centernet/ixrt/inference.py
index 3e7f954f812d5d9d2bcac8d78fda293817cc4707..e65c681fe9d82953d8babbd0107f3b14c0cc426a 100644
--- a/models/cv/detection/centernet/ixrt/inference.py
+++ b/models/cv/detection/centernet/ixrt/inference.py
@@ -126,7 +126,8 @@ def main():
         cfg['test_evaluator']['ann_file'] = os.path.join(args.datasets, 'annotations/instances_val2017.json')
 
         runner = RUNNERS.build(cfg)
-        
+
+        start_time = time.time()
         for input_data in tqdm(runner.test_dataloader):
             
             input_data = runner.model.data_preprocessor(input_data, False)
@@ -178,6 +179,9 @@ def main():
 
             runner.test_evaluator.process(data_samples=batch_data_samples, data_batch=input_data)
 
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         metrics = runner.test_evaluator.evaluate(len(runner.test_dataloader.dataset))
 
 if __name__ == "__main__":
diff --git a/models/cv/detection/centernet/ixrt/requirements.txt b/models/cv/detection/centernet/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9178d0b61aa4155c1effbf468da32f2b8dd9f96d
--- /dev/null
+++ b/models/cv/detection/centernet/ixrt/requirements.txt
@@ -0,0 +1,6 @@
+onnx
+tqdm
+mmdet
+mmdeploy
+mmengine
+transformers==4.37.1
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/README.md b/models/cv/detection/detr/ixrt/README.md
index 28df3f60e99b4c3901c0ee9c3c74aaa0946e7935..1704b35c8ddc267bcbba2867770fd8f498c8f521 100755
--- a/models/cv/detection/detr/ixrt/README.md
+++ b/models/cv/detection/detr/ixrt/README.md
@@ -15,14 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/cv/detection/detr/ixrt/ci/prepare.sh b/models/cv/detection/detr/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b70efe4dd7f758e1bc8631c4688237ca90450867
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+
+mkdir checkpoints
+python3 export_model.py --torch_file /root/data/checkpoints/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth --onnx_file checkpoints/detr_res50.onnx --bsz 1
\ No newline at end of file
diff --git a/models/cv/detection/detr/ixrt/inference.py b/models/cv/detection/detr/ixrt/inference.py
index eb33b614ee4f11a1fbe09cc225cd5f98e292ee5f..e7c112c8fb50c1f18453dd1845bd7b9e29f6a135 100755
--- a/models/cv/detection/detr/ixrt/inference.py
+++ b/models/cv/detection/detr/ixrt/inference.py
@@ -128,7 +128,6 @@ def main(config):
                 # ipdb.set_trace()
                       
                 save2json(img_id, pred_boxes, json_result)
-
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -155,6 +154,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -166,8 +166,9 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
         if map50 >= config.map_target:
diff --git a/models/cv/detection/detr/ixrt/requirements.txt b/models/cv/detection/detr/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..967c8c6817fad277f415780c69ab80f53edd54c0
--- /dev/null
+++ b/models/cv/detection/detr/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+pycuda
+onnx
+onnxsim
+tabulate
+pycocotools
+opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/detection/fcos/ixrt/README.md b/models/cv/detection/fcos/ixrt/README.md
index 49db1e04c9263472bd8db0675e3b543098f13362..bc3f046116bdecc90c74ee0f88e5abc1c1fc488b 100755
--- a/models/cv/detection/fcos/ixrt/README.md
+++ b/models/cv/detection/fcos/ixrt/README.md
@@ -16,22 +16,15 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install addict
-pip3 install yapf
-pip3 install pycuda
-pip3 install mmdet==2.28.2
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Dependency
 
 The inference of the FCOS model requires a dependency on a well-adapted mmcv-v1.7.0 library. Please inquire with the staff to obtain the relevant libraries.
 
+You can follow here to build: https://gitee.com/deep-spark/deepsparkhub/blob/master/toolbox/MMDetection/prepare_mmcv.sh
+
 ```bash
 
 cd mmcv
@@ -52,7 +45,7 @@ Pretrained model: <https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_cen
 MMDetection is an open source object detection toolbox based on PyTorch. It is a part of the OpenMMLab project.It is utilized for model conversion. In MMDetection, Execute model conversion command, and the checkpoints folder needs to be created, (mkdir checkpoints) in project
 
 ```bash
-
+mkdir -p checkpoints
 git clone -b v2.25.0 https://github.com/open-mmlab/mmdetection.git
 cd mmdetection
 python3 tools/deployment/pytorch2onnx.py \
diff --git a/models/cv/detection/fcos/ixrt/ci/prepare.sh b/models/cv/detection/fcos/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..92b6b95a2a107a9a40eb18e337b93fc726a0bd0a
--- /dev/null
+++ b/models/cv/detection/fcos/ixrt/ci/prepare.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+cp -r /root/data/3rd_party/mmcv-v1.7.1 ./mmcv
+cp -r -T /root/data/repos/deepsparkhub/toolbox/MMDetection/patch/mmcv/v1.7.1 ./mmcv
+cd mmcv
+rm -rf mmcv/ops/csrc/common/cuda/spconv/ mmcv/ops/csrc/common/utils/spconv/
+rm -f mmcv/ops/csrc/pytorch/cpu/sparse_*
+rm -f mmcv/ops/csrc/pytorch/cuda/fused_spconv_ops_cuda.cu
+rm -f mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.cu
+rm -f mmcv/ops/csrc/pytorch/cuda/sparse_*
+rm -f mmcv/ops/csrc/pytorch/sp*
+
+bash clean_mmcv.sh
+bash build_mmcv.sh
+bash install_mmcv.sh
+cd ..
+
+mkdir -p checkpoints
+cp -r /root/data/3rd_party/mmdetection-v2.25.0 ./mmdetection
+cd mmdetection
+python3 tools/deployment/pytorch2onnx.py \
+    ../fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py \
+    /root/data/checkpoints/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth \
+    --output-file ../checkpoints/r50_fcos.onnx \
+    --input-img demo/demo.jpg \
+    --test-img tests/data/color.jpg \
+    --shape 800 800 \
+    --show \
+    --verify \
+    --skip-postprocess \
+    --dynamic-export \
+    --cfg-options \
+      model.test_cfg.deploy_nms_pre=-1
\ No newline at end of file
diff --git a/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py b/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py
index a5bdf3e79352dd1a132c6b825ecd56129b291288..9218ea3a2d5ac16be9e1bc11ba7ee4e4e7c0c3f2 100644
--- a/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py
+++ b/models/cv/detection/fcos/ixrt/fcos_ixrt_inference.py
@@ -168,7 +168,11 @@ def main():
     inputs, outputs, allocations = get_io_bindings(engine)
         
     if args.task=="precision":
+        start_time = time.time()
         map50= eval_coco(args,inputs, outputs, allocations, context)
+        end_time = time.time()
+        e2e_time = end_time - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         
         print("="*40)
         print("MAP50:{0}".format(round(map50,3)))
diff --git a/models/cv/detection/fcos/ixrt/requirements.txt b/models/cv/detection/fcos/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a911f40d22c62d06c2a2be249831156de20c265
--- /dev/null
+++ b/models/cv/detection/fcos/ixrt/requirements.txt
@@ -0,0 +1,10 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+addict
+yapf
+pycuda
+mmdet==2.28.2
+opencv-python==4.6.0.66
\ No newline at end of file
diff --git a/models/cv/detection/yolov3/ixrt/README.md b/models/cv/detection/yolov3/ixrt/README.md
index f79543fd67ae92d6b6a49ff9028c427c8fd3eac2..fc531ab36cd91f262dec9aa57c373362b10380f1 100644
--- a/models/cv/detection/yolov3/ixrt/README.md
+++ b/models/cv/detection/yolov3/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install cv2
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -44,7 +38,7 @@ mv yolov3.weights onnx_tflite_yolov3/weights
 # 修改 detect.py 中 torch.onnx.export() 函数的opset_version=11,会在/weights下生成export.onnx
 python3 detect.py --cfg cfg/yolov3.cfg --weights weights/yolov3.weights
 
-mv export.onnx /Path/to/checkpoints/yolov3.onnx
+mv weights/export.onnx /Path/to/checkpoints/yolov3.onnx
 ```
 
 ## Inference
@@ -53,8 +47,8 @@ mv export.onnx /Path/to/checkpoints/yolov3.onnx
 export PROJ_DIR=/Path/to/yolov3/ixrt
 export DATASETS_DIR=/Path/to/coco2017/
 export CHECKPOINTS_DIR=./checkpoints
-export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
-export EVAL_DIR=${DATASETS_DIR}/val2017
+export COCO_GT=./coco/annotations/instances_val2017.json
+export EVAL_DIR=./coco/val2017
 export RUN_DIR=/Path/to/yolov3/ixrt
 export CONFIG_DIR=config/YOLOV3_CONFIG
 ```
diff --git a/models/cv/detection/yolov3/ixrt/ci/prepare.sh b/models/cv/detection/yolov3/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9fb652f13b4e02f2077b955a6a51ab1ccf8c8eb6
--- /dev/null
+++ b/models/cv/detection/yolov3/ixrt/ci/prepare.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+mkdir checkpoints
+unzip -q /root/data/3rd_party/onnx_tflite_yolov3.zip -d ./
+cp /root/data/checkpoints/yolov3.weights onnx_tflite_yolov3/weights
+cd onnx_tflite_yolov3
+python3 detect.py --cfg cfg/yolov3.cfg --weights weights/yolov3.weights
+mv weights/export.onnx ../checkpoints/yolov3.onnx
+cd ..
diff --git a/models/cv/detection/yolov3/ixrt/inference.py b/models/cv/detection/yolov3/ixrt/inference.py
index a7a60c878df96d294cdd56efe87a973a0e1f8765..4241328227c174d29ab093d4317e5591ab920b88 100644
--- a/models/cv/detection/yolov3/ixrt/inference.py
+++ b/models/cv/detection/yolov3/ixrt/inference.py
@@ -153,7 +153,6 @@ def main(config):
                 max_det=config.max_det
             )
             save2json(batch_img_id, pred_boxes, json_result, class_map)
-
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -180,6 +179,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -191,10 +191,11 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov3/ixrt/requirements.txt b/models/cv/detection/yolov3/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2ec37c1129a168dac9920da4cebdfe78169841a
--- /dev/null
+++ b/models/cv/detection/yolov3/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+opencv-python==4.6.0.66
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/yolov4/ixrt/README.md b/models/cv/detection/yolov4/ixrt/README.md
index 03c632d7cd72d3e38d1ddb89edc1dfbb1515c487..ae94ae43aeaeff1dd01d6c0f3bf7d5b9f7c019f5 100644
--- a/models/cv/detection/yolov4/ixrt/README.md
+++ b/models/cv/detection/yolov4/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install pycocotools
-pip3 install pycuda
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -55,18 +51,18 @@ onnxsim data/yolov4.onnx data/yolov4_sim.onnx
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov4darknet_fp16_accuracy.sh
+bash scripts/infer_yolov4_fp16_accuracy.sh
 # Performance
-bash scripts/infer_yolov4darknet_fp16_performance.sh
+bash scripts/infer_yolov4_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov4darknet_int8_accuracy.sh
+bash scripts/infer_yolov4_int8_accuracy.sh
 # Performance
-bash scripts/infer_yolov4darknet_int8_performance.sh
+bash scripts/infer_yolov4_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/detection/yolov4/ixrt/ci/prepare.sh b/models/cv/detection/yolov4/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f5381ef3db88f61e26d50b601f58b046ffa79317
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/ci/prepare.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+# clone yolov4
+cp -r /root/data/3rd_party/yolov4 ./
+
+mkdir data
+# export onnx model
+python3 export.py --cfg yolov4/cfg/yolov4.cfg --weight /root/data/checkpoints/yolov4.weights --batchsize 16 --output data/yolov4.onnx
+mv yolov4_16_3_608_608_static.onnx data/yolov4.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim data/yolov4.onnx data/yolov4_sim.onnx
diff --git a/models/cv/detection/yolov4/ixrt/inference.py b/models/cv/detection/yolov4/ixrt/inference.py
index 5d740507b3a54bf2248000b2ac60d09f12a9886a..4cb31b1b5e106623a4ded55e8bf6313babd65a15 100644
--- a/models/cv/detection/yolov4/ixrt/inference.py
+++ b/models/cv/detection/yolov4/ixrt/inference.py
@@ -68,6 +68,7 @@ def main(config):
     forward_time = 0.0
     class_map = coco80_to_coco91_class()
     num_samples = 0
+
     # Step3: Run on coco dataset
     for batch_names, batch_images, batch_shapes in tqdm(zip(*dataloader)):
         batch_data = np.ascontiguousarray(batch_images)
@@ -110,7 +111,6 @@ def main(config):
                 pred_results.append(pred_box.tolist())
 
             save2json(batch_img_id, pred_results, json_result, class_map)
-
     fps = num_samples / forward_time
 
     if config.test_mode == "FPS":
@@ -137,6 +137,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -148,10 +149,12 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov4/ixrt/requirements.txt b/models/cv/detection/yolov4/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5ff461d18d82755007c5cc116aedbeb8a02574e
--- /dev/null
+++ b/models/cv/detection/yolov4/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+onnxsim
+pycocotools
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_accuracy.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_accuracy.sh
similarity index 98%
rename from models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_accuracy.sh
rename to models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_accuracy.sh
index b732d4eb297b6319ad5bef4660a6f7dde0ef0abc..c33dc591362e34df05378869f4254190ef5a6985 100644
--- a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_accuracy.sh
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_accuracy.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR:-"${PROJ_DIR}/data/coco"}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_performance.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_performance.sh
similarity index 98%
rename from models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_performance.sh
rename to models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_performance.sh
index 796dad720e13250b6ee81c66defca990c416e220..a4a83ce72e715997ca64cf35b0b0ff0e8bd351a5 100644
--- a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_fp16_performance.sh
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_fp16_performance.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR:-"${PROJ_DIR}/data/coco"}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_accuracy.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_accuracy.sh
similarity index 98%
rename from models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_accuracy.sh
rename to models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_accuracy.sh
index c62d174c09e6f4b005a9b1e7ce028cc47643a930..20e593785bc25bb06b8c0f2e537542981cc65c00 100644
--- a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_accuracy.sh
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_accuracy.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR:-"${PROJ_DIR}/data/coco"}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_performance.sh b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_performance.sh
similarity index 98%
rename from models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_performance.sh
rename to models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_performance.sh
index 2e335fa1d013961c136cda4f79fd2be712311494..7f11038651ffe1e27c8e57f40e7b6f74b67e1945 100644
--- a/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4darknet_int8_performance.sh
+++ b/models/cv/detection/yolov4/ixrt/scripts/infer_yolov4_int8_performance.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR:-"${PROJ_DIR}/data/coco"}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov5/ixrt/README.md b/models/cv/detection/yolov5/ixrt/README.md
index a00b7c161554f7bb164d2f97514798b4625c92d0..ea1fc1801a78b35007fad7c3231216e9b9ccd862 100644
--- a/models/cv/detection/yolov5/ixrt/README.md
+++ b/models/cv/detection/yolov5/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install cv2
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -54,36 +48,36 @@ mv yolov5m.onnx /Path/to/checkpoints
 ## Inference
 
 ```bash
-export PROJ_DIR=/Path/to/yolov5m/ixrt
+export PROJ_DIR=/Path/to/yolov5/ixrt
 export DATASETS_DIR=/Path/to/coco2017/
 export CHECKPOINTS_DIR=./checkpoints
 export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 export EVAL_DIR=${DATASETS_DIR}/val2017
-export RUN_DIR=/Path/to/yolov5m/ixrt
-export CONFIG_DIR=config/YOLOV5M_CONFIG
+export RUN_DIR=/Path/to/yolov5/ixrt
+export CONFIG_DIR=config/YOLOV5_CONFIG
 ```
 
 ### FP16
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov5m_fp16_accuracy.sh
+bash scripts/infer_yolov5_fp16_accuracy.sh
 # Performance
-bash scripts/infer_yolov5m_fp16_performance.sh
+bash scripts/infer_yolov5_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov5m_int8_accuracy.sh
+bash scripts/infer_yolov5_int8_accuracy.sh
 # Performance
-bash scripts/infer_yolov5m_int8_performance.sh
+bash scripts/infer_yolov5_int8_performance.sh
 ```
 
 ## Results
 
 Model   |BatchSize  |Precision |FPS      |MAP@0.5   |MAP@0.5:0.95 |
 --------|-----------|----------|---------|----------|-------------|
-YOLOv5m |    32     |   FP16   | 680.93  |  0.637   |  0.447      |
-YOLOv5m |    32     |   INT8   | 1328.50 |  0.627   |  0.425      |
+YOLOv5 |    32     |   FP16   | 680.93  |  0.637   |  0.447      |
+YOLOv5 |    32     |   INT8   | 1328.50 |  0.627   |  0.425      |
diff --git a/models/cv/detection/yolov5/ixrt/ci/prepare.sh b/models/cv/detection/yolov5/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b66c06b56fd42240b3578ef2a764a39c5fe06b03
--- /dev/null
+++ b/models/cv/detection/yolov5/ixrt/ci/prepare.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+mkdir checkpoints
+cp -r /root/data/3rd_party/yolov5 ./
+# 切换到需要的版本分支
+cd yolov5
+
+# 有一些环境需要安装
+# wget https://ultralytics.com/assets/Arial.ttf
+mkdir -p /root/.config/Ultralytics
+cp /root/data/3rd_party/Arial.ttf /root/.config/Ultralytics/Arial.ttf
+
+ln -s /root/data/checkpoints/yolov5m.pt ./
+# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数)
+python3 export.py --weights yolov5m.pt --include onnx --opset 11 --batch-size 32
+mv yolov5m.onnx ../checkpoints
+cd ..
diff --git a/models/cv/detection/yolov5/ixrt/config/YOLOV5M_CONFIG b/models/cv/detection/yolov5/ixrt/config/YOLOV5_CONFIG
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/config/YOLOV5M_CONFIG
rename to models/cv/detection/yolov5/ixrt/config/YOLOV5_CONFIG
diff --git a/models/cv/detection/yolov5/ixrt/inference.py b/models/cv/detection/yolov5/ixrt/inference.py
index a7a60c878df96d294cdd56efe87a973a0e1f8765..c0476b899ba0ec51ab4aedc0596f19cb283952ab 100644
--- a/models/cv/detection/yolov5/ixrt/inference.py
+++ b/models/cv/detection/yolov5/ixrt/inference.py
@@ -180,6 +180,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -191,10 +192,12 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov5/ixrt/requirements.txt b/models/cv/detection/yolov5/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2ec37c1129a168dac9920da4cebdfe78169841a
--- /dev/null
+++ b/models/cv/detection/yolov5/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+opencv-python==4.6.0.66
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_accuracy.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_fp16_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_accuracy.sh
rename to models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_fp16_accuracy.sh
diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_performance.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_fp16_performance.sh
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_fp16_performance.sh
rename to models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_fp16_performance.sh
diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_accuracy.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_int8_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_accuracy.sh
rename to models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_int8_accuracy.sh
diff --git a/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_performance.sh b/models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_int8_performance.sh
similarity index 100%
rename from models/cv/detection/yolov5/ixrt/scripts/infer_yolov5m_int8_performance.sh
rename to models/cv/detection/yolov5/ixrt/scripts/infer_yolov5_int8_performance.sh
diff --git a/models/cv/detection/yolov5s/ixrt/README.md b/models/cv/detection/yolov5s/ixrt/README.md
index 62232386e1470737717c058f762a92a3a8327e19..3847db5a56bb3723c52fbeaf4484773674998cb1 100755
--- a/models/cv/detection/yolov5s/ixrt/README.md
+++ b/models/cv/detection/yolov5s/ixrt/README.md
@@ -15,16 +15,12 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
+pip3 install -r requirements.txt
 ```
 
 ### Download
 
-Pretrained model: <https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt>
+Pretrained model: <https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt>
 
 Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the validation dataset.
 
diff --git a/models/cv/detection/yolov5s/ixrt/ci/prepare.sh b/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b9f3a57f9d4bb7c25c55aa5621b8eb378093bd03
--- /dev/null
+++ b/models/cv/detection/yolov5s/ixrt/ci/prepare.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+mkdir -p checkpoints
+cp -r /root/data/3rd_party/yolov5 ./
+
+cd yolov5/
+
+# 有一些环境需要安装
+# wget https://ultralytics.com/assets/Arial.ttf
+mkdir -p /root/.config/Ultralytics
+cp /root/data/3rd_party/Arial.ttf /root/.config/Ultralytics/Arial.ttf
+
+ln -s /root/data/checkpoints/yolov5s.pt ./
+# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数)
+python3 export.py --weights yolov5s.pt --include onnx --opset 11 --batch-size 32
+mv yolov5s.onnx ../checkpoints
+cd ..
diff --git a/models/cv/detection/yolov5s/ixrt/inference.py b/models/cv/detection/yolov5s/ixrt/inference.py
index addf5278f5e31aca71ffa75051a1f0a716840d92..ad87fe1ec0c2b9e5fa4271c31018a174fef370d5 100644
--- a/models/cv/detection/yolov5s/ixrt/inference.py
+++ b/models/cv/detection/yolov5s/ixrt/inference.py
@@ -180,6 +180,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -191,10 +192,12 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov5s/ixrt/requirements.txt b/models/cv/detection/yolov5s/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6188db8f77c90851c51ba2066e00ada54bdea98
--- /dev/null
+++ b/models/cv/detection/yolov5s/ixrt/requirements.txt
@@ -0,0 +1,6 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/yolov6/ixrt/README.md b/models/cv/detection/yolov6/ixrt/README.md
index 2248bb5aa1a8bc379202116392cfd31e325558f0..5d0acbcd26ded23b9414019898dad5454c62e921 100644
--- a/models/cv/detection/yolov6/ixrt/README.md
+++ b/models/cv/detection/yolov6/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-glx
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install pycocotools
-pip3 install pycuda
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -31,14 +27,12 @@ Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the valida
 ```bash
 # get yolov6s.pt
 wget https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s.pt
-# set coco path
-mkdir -p data/
-ln -s /Path/to/coco/ data/coco
 ```
 
 ### Model Conversion
 
 ```bash
+mkdir -p data/
 # install yolov6
 git clone https://github.com/meituan/YOLOv6.git
 
@@ -54,22 +48,26 @@ popd
 
 ## Inference
 
+```bash
+export DATASETS_DIR=/Path/to/coco/
+```
+
 ### FP16
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov6s_fp16_accuracy.sh
+bash scripts/infer_yolov6_fp16_accuracy.sh
 # Performance
-bash scripts/infer_yolov6s_fp16_performance.sh
+bash scripts/infer_yolov6_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov6s_int8_accuracy.sh
+bash scripts/infer_yolov6_int8_accuracy.sh
 # Performance
-bash scripts/infer_yolov6s_int8_performance.sh
+bash scripts/infer_yolov6_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/detection/yolov6/ixrt/ci/prepare.sh b/models/cv/detection/yolov6/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3aa607e3be619c80e0ff27c38f09a39bbdf8ad4f
--- /dev/null
+++ b/models/cv/detection/yolov6/ixrt/ci/prepare.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+mkdir -p data/
+cp -r /root/data/3rd_party/YOLOv6 ./
+
+cd YOLOv6
+pip3 install -r requirements.txt
+
+ln -s /root/data/checkpoints/yolov6s.pt ./
+# export onnx model
+python3 deploy/ONNX/export_onnx.py --weights yolov6s.pt --img 640 --batch-size 32 --simplify
+mv yolov6s.onnx ../data/
+cd ..
diff --git a/models/cv/detection/yolov6/ixrt/inference.py b/models/cv/detection/yolov6/ixrt/inference.py
index 836f13b2376ded6144ea9bf0da7ed47cd3f5905f..1a4f151f8fc11dd44aad7a04efd101507f422a22 100644
--- a/models/cv/detection/yolov6/ixrt/inference.py
+++ b/models/cv/detection/yolov6/ixrt/inference.py
@@ -237,11 +237,15 @@ def main():
         print("FPS : ", fps)
         print(f"Performance Check : Test {fps} >= target {args.fps_target}")
     else:
+        start_time = time.time()
         dataloader, pred_results = evaluator.eval_ixrt(args)
         eval_result = evaluator.eval_ixrt_map(pred_results, dataloader, task)
+        end_time = time.time()
+        e2e_time = end_time - start_time
         map, map50 = eval_result[:2]
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {args.acc_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= args.acc_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov6/ixrt/requirements.txt b/models/cv/detection/yolov6/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c833a42f5277d91744ea3412a828f6fc55acb01
--- /dev/null
+++ b/models/cv/detection/yolov6/ixrt/requirements.txt
@@ -0,0 +1,6 @@
+tqdm
+onnx
+onnxsim
+pycocotools
+pycuda
+numpy==1.24.0
\ No newline at end of file
diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_accuracy.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_accuracy.sh
similarity index 98%
rename from models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_accuracy.sh
rename to models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_accuracy.sh
index 09cc0ac03802a697696ff3e68ea2c2157e240ea7..852aca0f008e57131371043c80d2b0b21680ead0 100644
--- a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_accuracy.sh
+++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_accuracy.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_performance.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_performance.sh
similarity index 98%
rename from models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_performance.sh
rename to models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_performance.sh
index 409fd354e86d7fa3092fda68bd1da2c1ed35498d..5de30b1c885f82b0b4748633f520d9230199ae7c 100644
--- a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_fp16_performance.sh
+++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_fp16_performance.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_accuracy.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_accuracy.sh
similarity index 98%
rename from models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_accuracy.sh
rename to models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_accuracy.sh
index 701f80f06ac1ca46d154c1122f02913b247a83af..e7099ba0b83bdeae4632b86bc6475c737d4ffa3e 100644
--- a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_accuracy.sh
+++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_accuracy.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_performance.sh b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_performance.sh
similarity index 98%
rename from models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_performance.sh
rename to models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_performance.sh
index 58f77417058c5461fe84161bb139bcecad4623c6..85a36fadf12b34e86671bb47ab13977a9797905b 100644
--- a/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6s_int8_performance.sh
+++ b/models/cv/detection/yolov6/ixrt/scripts/infer_yolov6_int8_performance.sh
@@ -23,7 +23,7 @@ check_status()
 }
 
 PROJ_DIR=$(cd $(dirname $0);cd ../; pwd)
-DATASETS_DIR="${PROJ_DIR}/data/coco"
+DATASETS_DIR=${DATASETS_DIR}
 COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 EVAL_DIR=${DATASETS_DIR}/images/val2017
 CHECKPOINTS_DIR="${PROJ_DIR}/data"
diff --git a/models/cv/detection/yolov7/ixrt/README.md b/models/cv/detection/yolov7/ixrt/README.md
index 4e7375de573bb9cc8c0a23eef128afb94d6d0969..7b6e91c3e9cbc039f008dfccbd49117a406408f3 100644
--- a/models/cv/detection/yolov7/ixrt/README.md
+++ b/models/cv/detection/yolov7/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install ultralytics
-pip3 install pycocotools
-pip3 install cv2
-pip3 install opencv-python==4.6.0.66
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -39,7 +33,7 @@ Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the valida
 
 git clone https://github.com/WongKinYiu/yolov7.git
 cd yolov7
-python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 32
+python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 16
 mkdir /Your_Projects/To/checkpoints
 mv yolov7.onnx /Path/to/checkpoints/yolov7m.onnx
 ```
@@ -53,25 +47,25 @@ export CHECKPOINTS_DIR=./checkpoints
 export COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json
 export EVAL_DIR=${DATASETS_DIR}/val2017
 export RUN_DIR=/Path/to/yolov7/ixrt
-export CONFIG_DIR=config/YOLOV7M_CONFIG
+export CONFIG_DIR=config/YOLOV7_CONFIG
 ```
 
 ### FP16
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov7m_fp16_accuracy.sh
+bash scripts/infer_yolov7_fp16_accuracy.sh
 # Performance
-bash scripts/infer_yolov7m_fp16_performance.sh
+bash scripts/infer_yolov7_fp16_performance.sh
 ```
 
 ### INT8
 
 ```bash
 # Accuracy
-bash scripts/infer_yolov7m_int8_accuracy.sh
+bash scripts/infer_yolov7_int8_accuracy.sh
 # Performance
-bash scripts/infer_yolov7m_int8_performance.sh
+bash scripts/infer_yolov7_int8_performance.sh
 ```
 
 ## Results
diff --git a/models/cv/detection/yolov7/ixrt/ci/prepare.sh b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..310566fb8b6ddbba24aecc3fdace7d7146063f3d
--- /dev/null
+++ b/models/cv/detection/yolov7/ixrt/ci/prepare.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+mkdir -p checkpoints
+cp -r /root/data/3rd_party/yolov7 ./
+cd yolov7
+ln -s /root/data/checkpoints/yolov7.pt ./
+python3 export.py --weights yolov7.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --max-wh 640 --batch-size 16
+mv yolov7.onnx ../checkpoints/yolov7m.onnx
+cd ..
diff --git a/models/cv/detection/yolov7/ixrt/config/YOLOV7M_CONFIG b/models/cv/detection/yolov7/ixrt/config/YOLOV7_CONFIG
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/config/YOLOV7M_CONFIG
rename to models/cv/detection/yolov7/ixrt/config/YOLOV7_CONFIG
diff --git a/models/cv/detection/yolov7/ixrt/inference.py b/models/cv/detection/yolov7/ixrt/inference.py
index a7a60c878df96d294cdd56efe87a973a0e1f8765..c0476b899ba0ec51ab4aedc0596f19cb283952ab 100644
--- a/models/cv/detection/yolov7/ixrt/inference.py
+++ b/models/cv/detection/yolov7/ixrt/inference.py
@@ -180,6 +180,7 @@ def main(config):
         with open(pred_json, "w") as f:
             json.dump(json_result, f)
 
+        start_time = time.time()
         anno_json = config.coco_gt
         anno = COCO(anno_json)  # init annotations api
         pred = anno.loadRes(pred_json)  # init predictions api
@@ -191,10 +192,12 @@ def main(config):
             f"==============================eval {config.model_name} {config.precision} coco map =============================="
         )
         eval.summarize()
-
+        e2e_time = time.time() - start_time
         map, map50 = eval.stats[:2]
+        print(F"E2E time : {e2e_time:.3f} seconds")
         print("MAP@0.5 : ", map50)
         print(f"Accuracy Check : Test {map50} >= target {config.map_target}")
+        print(F"E2E time : {e2e_time:.3f} seconds")
         if map50 >= config.map_target:
             print("pass!")
             exit()
diff --git a/models/cv/detection/yolov7/ixrt/requirements.txt b/models/cv/detection/yolov7/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2ec37c1129a168dac9920da4cebdfe78169841a
--- /dev/null
+++ b/models/cv/detection/yolov7/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+tqdm
+onnx
+onnxsim
+ultralytics
+pycocotools
+opencv-python==4.6.0.66
+pycuda
\ No newline at end of file
diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_accuracy.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_fp16_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_accuracy.sh
rename to models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_fp16_accuracy.sh
diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_performance.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_fp16_performance.sh
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_fp16_performance.sh
rename to models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_fp16_performance.sh
diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_accuracy.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_int8_accuracy.sh
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_accuracy.sh
rename to models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_int8_accuracy.sh
diff --git a/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_performance.sh b/models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_int8_performance.sh
similarity index 100%
rename from models/cv/detection/yolov7/ixrt/scripts/infer_yolov7m_int8_performance.sh
rename to models/cv/detection/yolov7/ixrt/scripts/infer_yolov7_int8_performance.sh
diff --git a/models/cv/detection/yolov8/ixrt/ci/prepare.sh b/models/cv/detection/yolov8/ixrt/ci/prepare.sh
index 6eca919e827c85ab8e9ec4b528d454fb92b5cb4b..58d524a6adcbe53c38fc91b4bd4d804b2fa8800c 100644
--- a/models/cv/detection/yolov8/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolov8/ixrt/ci/prepare.sh
@@ -1,3 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 mkdir -p checkpoints
 ln -s /root/data/checkpoints/yolov8.pt yolov8.pt
diff --git a/models/cv/detection/yolox/ixrt/ci/prepare.sh b/models/cv/detection/yolox/ixrt/ci/prepare.sh
index 7c58f5939c782251640c70bc9eaeba043316fea0..cb7a30d73f6f0f3642dbe38555d90d5956d81909 100644
--- a/models/cv/detection/yolox/ixrt/ci/prepare.sh
+++ b/models/cv/detection/yolox/ixrt/ci/prepare.sh
@@ -1,6 +1,35 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
 unzip /root/data/repos/yolox-f00a798c8bf59f43ab557a2f3d566afa831c8887.zip -d ./
 ln -s /root/data/checkpoints/yolox_m.pth ./YOLOX/
+# install ixrt run
+bash /root/data/3rd_party/ixrt-0.10.0+corex.4.2.0.20250115-linux_x86_64.run
 cd YOLOX && python3 setup.py develop && python3 tools/export_onnx.py --output-name ../yolox.onnx -n yolox-m -c yolox_m.pth --batch-size 32
 if [ "$1" = "nvidia" ]; then
     cd ../plugin && mkdir -p build && cd build && cmake .. -DUSE_TRT=1 && make -j12
diff --git a/models/cv/face/facenet/ixrt/README.md b/models/cv/face/facenet/ixrt/README.md
index 0c2df5120bf75917c11d1d5a68c7dd377c5c823a..36ee33dbcfef22c1c812184c3d4fda88abfa260e 100644
--- a/models/cv/face/facenet/ixrt/README.md
+++ b/models/cv/face/facenet/ixrt/README.md
@@ -15,19 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-glx
 
-pip3 install tensorflow
-pip3 install onnxsim
-pip3 install scikit-learn
-pip3 install tf_slim
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install scipy==1.8.0
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
-pip3 install simplejson
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -49,8 +37,9 @@ unzip 20180408-102900.zip
 mkdir -p checkpoints
 mkdir -p facenet_weights
 git clone https://github.com/timesler/facenet-pytorch
+# facenet-pytorch/dependencies/facenet is submodule, pls make sure it has been cloned or you can clone directly from https://github.com/davidsandberg/facenet/tree/096ed770f163957c1e56efa7feeb194773920f6e
 mv /Path/facenet/ixrt/tensorflow2pytorch.py facenet-pytorch
-python3 /facenet-pytorch/tensorflow2pytorch.py \
+python3 ./facenet-pytorch/tensorflow2pytorch.py \
         --facenet_weights_path ./facenet_weights \
         --facenet_pb_path ./20180408-102900 \
         --onnx_save_name facenet_export.onnx
diff --git a/models/cv/face/facenet/ixrt/ci/prepare.sh b/models/cv/face/facenet/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9d7081e7f053f6117e91bd5df10c9ce25e61c04d
--- /dev/null
+++ b/models/cv/face/facenet/ixrt/ci/prepare.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+unzip -q /root/data/checkpoints/20180408-102900.zip -d ./
+unzip -q /root/data/datasets/facenet_datasets.zip -d ./
+mkdir -p checkpoints
+mkdir -p facenet_weights
+cp -r /root/data/3rd_party/facenet-pytorch ./
+cp ./tensorflow2pytorch.py facenet-pytorch
+python3 ./facenet-pytorch/tensorflow2pytorch.py \
+        --facenet_weights_path ./facenet_weights \
+        --facenet_pb_path ./20180408-102900 \
+        --onnx_save_name facenet_export.onnx
+mv facenet_export.onnx ./facenet_weights
+
+sed -i -e 's#/last_bn/BatchNormalization_output_0#1187#g' -e 's#/avgpool_1a/GlobalAveragePool_output_0#1178#g' deploy.py build_engine.py
\ No newline at end of file
diff --git a/models/cv/face/facenet/ixrt/inference.py b/models/cv/face/facenet/ixrt/inference.py
index ec9876e33c800206003d4d5e2c2d165929ba6591..eaed8b27ca70fb1628c8e5b3351b9e72692150fd 100644
--- a/models/cv/face/facenet/ixrt/inference.py
+++ b/models/cv/face/facenet/ixrt/inference.py
@@ -58,6 +58,7 @@ def main(config):
         print("Warm Done.")
 
     # Inference
+    metricResult = {"metricResult": {}}
     if config.test_mode == "FPS":
         torch.cuda.synchronize()
         start_time = time.time()
@@ -73,6 +74,7 @@ def main(config):
 
         print("FPS : ", fps)
         print(f"Performance Check : Test {fps} >= target {config.fps_target}")
+        metricResult["metricResult"]["FPS"] = round(fps, 3)
         if fps >= config.fps_target:
             print("pass!")
             exit()
@@ -84,7 +86,7 @@ def main(config):
 
         classes = []
         embeddings = []
-
+        start_time = time.time()
         for xb, yb in tqdm(embed_loader):
         
             output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
@@ -102,7 +104,8 @@ def main(config):
             classes.extend(yb[0:current_imgs_num].numpy())
             embeddings.extend(output)
 
-
+        e2e_time = time.time() - start_time
+        print(f"E2E time: {e2e_time:.3f} seconds")
         embeddings_dict = dict(zip(crop_paths,embeddings))
 
         pairs = read_pairs(config.datasets_dir + config.pairs_name)
@@ -119,6 +122,9 @@ def main(config):
         #eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr, fill_value="extrapolate")(x), 0., 1.)
         #print('Equal Error Rate (EER): %1.3f' % eer)
 
+        metricResult["metricResult"]["E2E time"] = round(e2e_time, 3)
+        metricResult["metricResult"]["AUC"] = round(auc, 3)
+        metricResult["metricResult"]["Acc"] = round(np.mean(accuracy), 3)
         acc = np.mean(accuracy)
         print(f"Accuracy Check : Test {acc} >= target {config.acc_target}")
         if acc >= config.acc_target:
@@ -127,6 +133,7 @@ def main(config):
         else:
             print("failed!")
             exit(1)
+    print(metricResult)
 
 def parse_config():
     parser = argparse.ArgumentParser()
diff --git a/models/cv/face/facenet/ixrt/requirements.txt b/models/cv/face/facenet/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1b549a88d296c1f16d6eeb65bc28b9ddefcaea8
--- /dev/null
+++ b/models/cv/face/facenet/ixrt/requirements.txt
@@ -0,0 +1,12 @@
+tensorflow
+onnxsim
+scikit-learn
+tf_slim
+tqdm
+pycuda
+onnx
+tabulate
+scipy==1.8.0
+pycocotools
+opencv-python==4.6.0.66
+simplejson
\ No newline at end of file
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/README.md b/models/cv/pose_estimation/lightweight_openpose/ixrt/README.md
similarity index 85%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/README.md
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/README.md
index cf25c1a887fd5ef8b5e788a57e0a214c31effc98..ca18417a413f950ceba19860c42cf7bd671c8117 100644
--- a/models/cv/pose_estimation/lightweightopenpose/ixrt/README.md
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install onnx
-pip3 install tqdm
-pip3 install onnxsim
-pip3 install simplejson
-pip3 install opencv-python==4.6.0.66
-pip3 install mmcv==1.5.3
-pip3 install pycocotools
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -37,15 +31,15 @@ cd lightweight-human-pose-estimation.pytorch
 mv scripts/convert_to_onnx.py .
 python3 convert_to_onnx.py --checkpoint-path /Path/to/checkpoint_iter_370000.pth
 cd ..
-mkdir lightweight_openpose
-onnxsim ./lightweight-human-pose-estimation.pytorch/human-pose-estimation.onnx ./lightweight_openpose/lightweight_openpose.onnx
+mkdir -p checkpoints
+onnxsim ./lightweight-human-pose-estimation.pytorch/human-pose-estimation.onnx ./checkpoints/lightweight_openpose.onnx
 ```
 
 ## Inference
 
 ```bash
 export DATASETS_DIR=/Path/to/coco_pose/
-export CHECKPOINTS_DIR=/Path/to/lightweight_openpose/
+export CHECKPOINTS_DIR=/Path/to/checkpoints/
 ```
 
 ### FP16
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/build_engine.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/build_engine.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/build_engine.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/build_engine.py
diff --git a/models/cv/pose_estimation/lightweight_openpose/ixrt/ci/prepare.sh b/models/cv/pose_estimation/lightweight_openpose/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b7c493f5ddbe3b224e94c76951a127ddef100d63
--- /dev/null
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/ci/prepare.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+cp -r /root/data/3rd_party/lightweight-human-pose-estimation.pytorch ./
+cd lightweight-human-pose-estimation.pytorch
+mv scripts/convert_to_onnx.py .
+ln -s /root/data/checkpoints/checkpoint_iter_370000.pth ./
+python3 convert_to_onnx.py --checkpoint-path checkpoint_iter_370000.pth
+cd ..
+mkdir -p checkpoints
+onnxsim ./lightweight-human-pose-estimation.pytorch/human-pose-estimation.onnx ./checkpoints/lightweight_openpose.onnx
\ No newline at end of file
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/common.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/common.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/common.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/common.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/__init__.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/__init__.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/__init__.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/__init__.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/coco.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/coco.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/coco.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/coco.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/transformations.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/transformations.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/transformations.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/datasets/transformations.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_accuracy.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_accuracy.py
old mode 100755
new mode 100644
similarity index 99%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/inference_accuracy.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/inference_accuracy.py
index ccb1dab002e7eef24288879e7130b744e7b3e9a1..57118a768d7b6cd9644c85464dd62fba274d736a
--- a/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_accuracy.py
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_accuracy.py
@@ -296,7 +296,10 @@ def evaluate(labels, output_name, images_folder, engine, context, config, multis
 def main(config):
     engine, context = openpose_trtapi_ixrt(config)
     print(" config and load model ok...")
+    start_time = time.time()
     evaluate(config.labels, config.output_name, config.images_folder, engine, context, config)
+    e2e_time = time.time() - start_time
+    print(F"E2E time : {e2e_time:.3f} seconds")
     print(" done ...")
     
 
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_performance.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_performance.py
old mode 100755
new mode 100644
similarity index 99%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/inference_performance.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/inference_performance.py
index d472d6d64ecdee4d92c11083d9bdc75c729d1a6e..f22c6ab47cc04ee1765c50442139fbd6c7dc40c6
--- a/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_performance.py
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/inference_performance.py
@@ -133,6 +133,7 @@ def main(config):
         forward_time = end_time - start_time
 
         fps = config.run_loop * config.bsz / forward_time
+        print("FPS : ", fps)
         print(f"\nCheck FPS         Test : {fps}    Target:{config.fps_target}   State : {'Pass' if fps >= config.fps_target else 'Fail'}")
 
 
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/__init__.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/modules/__init__.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/modules/__init__.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/modules/__init__.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/keypoints.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/modules/keypoints.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/modules/keypoints.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/modules/keypoints.py
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/pose.py b/models/cv/pose_estimation/lightweight_openpose/ixrt/modules/pose.py
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/modules/pose.py
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/modules/pose.py
diff --git a/models/cv/pose_estimation/lightweight_openpose/ixrt/requirements.txt b/models/cv/pose_estimation/lightweight_openpose/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34aca0513ef69adc544e15a8976428b07dfc712c
--- /dev/null
+++ b/models/cv/pose_estimation/lightweight_openpose/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+onnx
+tqdm
+onnxsim
+simplejson
+opencv-python==4.6.0.66
+mmcv==1.5.3
+pycocotools
\ No newline at end of file
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh b/models/cv/pose_estimation/lightweight_openpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh
diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh b/models/cv/pose_estimation/lightweight_openpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh
similarity index 100%
rename from models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh
rename to models/cv/pose_estimation/lightweight_openpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh
diff --git a/models/cv/pose_estimation/rtmpose/ixrt/README.md b/models/cv/pose_estimation/rtmpose/ixrt/README.md
index 3e6b68a47647044b2b9116ec57303bef9206f899..ea69ea32d3566bcca744e89342cafd95be05b4d7 100644
--- a/models/cv/pose_estimation/rtmpose/ixrt/README.md
+++ b/models/cv/pose_estimation/rtmpose/ixrt/README.md
@@ -15,13 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-dev
 
-pip3 install onnx
-pip3 install tqdm
-pip3 install onnxsim
-pip3 install mmdet==3.3.0
-pip3 install mmpose==1.3.1
-pip3 install mmdeploy==1.3.1
-pip3 install mmengine==0.10.4
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -37,7 +31,7 @@ Dataset: <http://images.cocodataset.org/zips/val2017.zip> to download the valida
 
 mkdir -p data/rtmpose
 
-wget -P data/rtmpose/   https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth
+wget -P data/rtmpose/ https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth
 
 python3 export.py --weight data/rtmpose/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth --cfg rtmpose-m_8xb256-420e_coco-256x192.py --input 1,3,256,192  --output data/rtmpose/rtmpose.onnx
 
diff --git a/models/cv/pose_estimation/rtmpose/ixrt/ci/prepare.sh b/models/cv/pose_estimation/rtmpose/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d2af81797bac38bbb848063a7bef36a6b0a92c08
--- /dev/null
+++ b/models/cv/pose_estimation/rtmpose/ixrt/ci/prepare.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+mkdir -p data/rtmpose
+ln -s /root/data/checkpoints/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth data/rtmpose/
+
+python3 export.py --weight data/rtmpose/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth --cfg rtmpose-m_8xb256-420e_coco-256x192.py --input 1,3,256,192  --output data/rtmpose/rtmpose.onnx
+
+# use onnxsim optimize onnx model
+onnxsim data/rtmpose/rtmpose.onnx data/rtmpose/rtmpose_opt.onnx
\ No newline at end of file
diff --git a/models/cv/pose_estimation/rtmpose/ixrt/predict.py b/models/cv/pose_estimation/rtmpose/ixrt/predict.py
index 9d11f889c2c1e4dc941eded74776cae39946f757..51cfd6c74d1f07bb35204ae01515a0ab50b26afc 100644
--- a/models/cv/pose_estimation/rtmpose/ixrt/predict.py
+++ b/models/cv/pose_estimation/rtmpose/ixrt/predict.py
@@ -150,6 +150,8 @@ def main():
         out_file="./result.jpg")
 
     print("Results saved as result.jpg.")
+    metricResult = {"metricResult": {"Results": "Results saved as result.jpg"}}
+    print(metricResult)
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/models/cv/pose_estimation/rtmpose/ixrt/requirements.txt b/models/cv/pose_estimation/rtmpose/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7459a7a9a3bc8de7fd17408664203e020b1fdf7
--- /dev/null
+++ b/models/cv/pose_estimation/rtmpose/ixrt/requirements.txt
@@ -0,0 +1,7 @@
+onnx
+tqdm
+onnxsim
+mmdet==3.3.0
+mmpose==1.3.1
+mmdeploy==1.3.1
+mmengine==0.10.4
\ No newline at end of file
diff --git a/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh b/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh
index 38f651f98a1ebf3d18790898b88f98d0f8ac1cf4..022d4d31a2ecbd325bd14c35fe0acab446e01f67 100644
--- a/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh
+++ b/models/cv/segmentation/mask_rcnn/ixrt/ci/prepare.sh
@@ -1,5 +1,26 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
 ln -s /root/data/checkpoints/maskrcnn.wts ./python/
 ln -s /root/data/datasets/coco ./coco
+# install ixrt run
+bash /root/data/3rd_party/ixrt-0.10.0+corex.4.2.0.20250115-linux_x86_64.run
+
 if [ "$1" = "nvidia" ]; then
     cd scripts && bash init_nv.sh
 else
diff --git a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
index 75484195b0424a3ef852e620fb1dea2390048700..455ff850943d77cb464e3195e9529fda1b4905d3 100644
--- a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
+++ b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
@@ -186,6 +186,7 @@ def run_maskrcnn(engine_file, image_folder):
 def get_maskrcnn_perf(config):
     cuda.init()
     logger = trt.Logger(trt.Logger.WARNING)
+    metricResult = {"metricResult": {}}
     engine_file_buffer = open(config.engine_file, "rb")
     runtime = trt.Runtime(logger)
     assert runtime
@@ -227,6 +228,8 @@ def get_maskrcnn_perf(config):
         output["allocation"].free()
     engine_file_buffer.close()
 
+    metricResult["metricResult"]["FPS"] = round(fps, 3)
+    print(metricResult)
     print("\nFPS : ", fps)
     print(f"Performance Check : Test {fps} >= target {config.fps_target}")
     if fps >= config.fps_target:
@@ -237,6 +240,7 @@ def get_maskrcnn_perf(config):
 
 def get_maskrcnn_acc(config):
     json_result = []
+    metricResult = {"metricResult": {}}
     class_map = coco80_to_coco91_class()
 
     # Load dataloader
@@ -313,10 +317,6 @@ def get_maskrcnn_acc(config):
             batched_paddings[0]
         )
         save2json(batch_img_id, bboxs_masks, json_result, class_map)
-    end_time = time.time()
-    end2end_time = end_time - start_time
-
-    print(F"E2E time : {end2end_time:.3f} seconds")
     print("Forward done !")
 
     tmp_result_name = "pred_results.json"
@@ -341,6 +341,10 @@ def get_maskrcnn_acc(config):
     print(f"==============================eval COCO segm mAP ==============================")
     segm_eval.summarize()
 
+    end_time = time.time()
+    end2end_time = end_time - start_time
+
+    print(F"E2E time : {end2end_time:.3f} seconds")
     _, map50 = eval.stats[:2]
     print("bbox mAP@0.5 : ", map50)
     print(f"bbox Accuracy Check : Test {map50} >= target {config.map_target}")
@@ -348,6 +352,10 @@ def get_maskrcnn_acc(config):
     _, segm_map50 = segm_eval.stats[:2]
     print("segm mAP@0.5 : ", segm_map50)
     print(f"segm Accuracy Check : Test {segm_map50} >= target {config.segm_map_target}")
+    metricResult["metricResult"]["E2E time"] = round(end2end_time, 3)
+    metricResult["metricResult"]["bbox mAP@0.5"] = round(map50, 3)
+    metricResult["metricResult"]["segm mAP@0.5"] = round(segm_map50, 3)
+    print(metricResult)
 
     if map50 >= config.map_target and segm_map50 >= config.segm_map_target:
         print("pass!")
diff --git a/models/cv/segmentation/mask_rcnn/ixrt/scripts/init.sh b/models/cv/segmentation/mask_rcnn/ixrt/scripts/init.sh
index bd1fe1177a797dc149968f82017ee25877667c00..3b0949ae02839276e1d1b5f53aba388cb91abdc5 100644
--- a/models/cv/segmentation/mask_rcnn/ixrt/scripts/init.sh
+++ b/models/cv/segmentation/mask_rcnn/ixrt/scripts/init.sh
@@ -31,6 +31,7 @@ bash prepare_system_env.sh
 #pip3 install opencv-python==4.6.0.66
 pip3 install pycocotools==2.0.7
 pip3 install tqdm
+pip3 install pycuda
 
 # build engine
 cd ../python
diff --git a/models/cv/segmentation/solov1/ixrt/README.md b/models/cv/segmentation/solov1/ixrt/README.md
index d675f5494bcb969639fa84b1c2d4af78e0f075bb..45de0d380974ad938105fe249b78291a24f33cab 100644
--- a/models/cv/segmentation/solov1/ixrt/README.md
+++ b/models/cv/segmentation/solov1/ixrt/README.md
@@ -11,20 +11,15 @@ SOLO (Segmenting Objects by Locations) is a new instance segmentation method tha
 ```bash
 yum install mesa-libGL
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install onnxsim
-pip3 install tabulate
-pip3 install mmdet==2.28.2
-pip3 install addict
-pip3 install yapf
-pip3 install pycuda
+pip3 install -r requirements.txt
 ```
 
 ### Dependency
 
 The inference of the Solov1 model requires a dependency on a well-adapted mmcv-v1.7.0 library. Please inquire with the staff to obtain the relevant libraries.
 
+You can follow here to build: https://gitee.com/deep-spark/deepsparkhub/blob/master/toolbox/MMDetection/prepare_mmcv.sh
+
 ```bash
 cd mmcv
 sh build_mmcv.sh
diff --git a/models/cv/segmentation/solov1/ixrt/ci/prepare.sh b/models/cv/segmentation/solov1/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..09fa1878415ca72f1fd17b9ca6e19b16926756f9
--- /dev/null
+++ b/models/cv/segmentation/solov1/ixrt/ci/prepare.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip install -r requirements.txt
+
+cp -r /root/data/3rd_party/mmcv-v1.7.1 ./mmcv
+cp -r -T /root/data/repos/deepsparkhub/toolbox/MMDetection/patch/mmcv/v1.7.1 ./mmcv
+cd mmcv
+rm -rf mmcv/ops/csrc/common/cuda/spconv/ mmcv/ops/csrc/common/utils/spconv/
+rm -f mmcv/ops/csrc/pytorch/cpu/sparse_*
+rm -f mmcv/ops/csrc/pytorch/cuda/fused_spconv_ops_cuda.cu
+rm -f mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.cu
+rm -f mmcv/ops/csrc/pytorch/cuda/sparse_*
+rm -f mmcv/ops/csrc/pytorch/sp*
+
+bash clean_mmcv.sh
+bash build_mmcv.sh
+bash install_mmcv.sh
+cd ..
+
+mkdir -p checkpoints
+ln -s /root/data/checkpoints/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth ./
+ln -s /root/data/datasets/coco ./
+python3 solo_torch2onnx.py --cfg ./solo_r50_fpn_3x_coco.py --checkpoint ./solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth --batch_size 1
+mv r50_solo_bs1_800x800.onnx ./checkpoints/r50_solo_bs1_800x800.onnx
\ No newline at end of file
diff --git a/models/cv/segmentation/solov1/ixrt/requirements.txt b/models/cv/segmentation/solov1/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c2fa63498d94213b2add360b81c0270cbc9c474
--- /dev/null
+++ b/models/cv/segmentation/solov1/ixrt/requirements.txt
@@ -0,0 +1,8 @@
+tqdm
+onnx
+onnxsim
+tabulate
+mmdet==2.28.2
+addict
+yapf
+pycuda
\ No newline at end of file
diff --git a/models/cv/segmentation/solov1/ixrt/solov1_inference.py b/models/cv/segmentation/solov1/ixrt/solov1_inference.py
index 473bff852f14930e7dacc20ec9a204b9222039fe..594286c05cc88ab48b540ce3897135da99ade7c7 100644
--- a/models/cv/segmentation/solov1/ixrt/solov1_inference.py
+++ b/models/cv/segmentation/solov1/ixrt/solov1_inference.py
@@ -139,15 +139,20 @@ def main():
     # Load Engine
     engine, context = create_engine_context(args.engine, logger)
     inputs, outputs, allocations = get_io_bindings(engine)
-    
+    metricResult = {"metricResult": {}}
     if args.task=="precision":
+        start_time = time.time()
         segm_mAP= eval_coco(args,inputs, outputs, allocations, context)
+        e2e_time = time.time() - start_time
+        print(F"E2E time : {e2e_time:.3f} seconds")
         
         print("="*40)
         print("segm_mAP:{0}".format(round(segm_mAP,3)))
         print("="*40)
         print(f"Check segm_mAP Test : {round(segm_mAP,3)}  Target:{args.target_map} State : {'Pass' if round(segm_mAP,3) >= args.target_map else 'Fail'}")
+        metricResult["metricResult"]["segm_mAP"] = round(segm_mAP, 3)
         status_map = check_target(segm_mAP, args.target_map)
+        print(metricResult)
         sys.exit(int(not (status_map)))   
     else:
         torch.cuda.synchronize()
@@ -162,7 +167,9 @@ def main():
         print("fps:{0}".format(round(fps,2)))
         print("="*40)
         print(f"Check fps Test : {round(fps,3)}  Target:{args.target_fps} State : {'Pass' if  fps >= args.target_fps else 'Fail'}")
+        metricResult["metricResult"]["FPS"] = round(fps, 3)
         status_fps = check_target(fps, args.target_fps)
+        print(metricResult)
         sys.exit(int(not (status_fps)))
     
 if __name__ == "__main__":
diff --git a/models/multimodal/text_and_image/clip/ixformer/README.md b/models/multimodal/text_and_image/clip/ixrt/README.md
similarity index 91%
rename from models/multimodal/text_and_image/clip/ixformer/README.md
rename to models/multimodal/text_and_image/clip/ixrt/README.md
index 7b5ccd67ab09feadbb3983f83753afb6507b2a5a..587b1cc27e394b434f5ca37a55b8b2c8cfd50c18 100644
--- a/models/multimodal/text_and_image/clip/ixformer/README.md
+++ b/models/multimodal/text_and_image/clip/ixrt/README.md
@@ -25,9 +25,9 @@ pip3 install -U transformers==4.27.1
 Pretrained model: Go to the website <https://huggingface.co/models> to find the pre-trained model you need. Here, we choose clip-vit-base-patch32.
 
 ```bash
-# Download model from the website and make sure the model's path is "/home/data/openai/clip-vit-base-patch32"
-mkdir -p /data
-unzip clip-vit-base-patch32.zip
+# Download model from the website and make sure the model's path is "data/clip-vit-base-patch32"
+mkdir -p data
+unzip clip-vit-base-patch32.zip -d data/
 ```
 
 ## Run model
@@ -37,5 +37,5 @@ unzip clip-vit-base-patch32.zip
 Please modify the part in the test_clip.py script that pertains to the model path.
 
 ```bash
-python3 test_clip.py
+python3 inference.py
 ```
diff --git a/models/multimodal/text_and_image/clip/ixrt/ci/prepare.sh b/models/multimodal/text_and_image/clip/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..606a1d985caebd7046958720cfc8077996ceb7ae
--- /dev/null
+++ b/models/multimodal/text_and_image/clip/ixrt/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -U transformers==4.27.1
+
+mkdir -p data
+ln -s /root/data/checkpoints/clip-vit-base-patch32 data/
\ No newline at end of file
diff --git a/models/multimodal/text_and_image/clip/ixformer/inference.py b/models/multimodal/text_and_image/clip/ixrt/inference.py
similarity index 81%
rename from models/multimodal/text_and_image/clip/ixformer/inference.py
rename to models/multimodal/text_and_image/clip/ixrt/inference.py
index 013c96e8cd5123312751021a3ae0ee898515826e..e690c988e82d59ddd4b7fe3e3b8a3b11a7ee00db 100644
--- a/models/multimodal/text_and_image/clip/ixformer/inference.py
+++ b/models/multimodal/text_and_image/clip/ixrt/inference.py
@@ -26,18 +26,20 @@ from transformers import CLIPProcessor
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = (
-    CLIPModel.from_pretrained("/home/data/openai/clip-vit-base-patch32")
+    CLIPModel.from_pretrained("data/clip-vit-base-patch32")
     .to(device)
     .half()
 )
 model = model.eval()
-processor = CLIPProcessor.from_pretrained("/home/data/openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("data/clip-vit-base-patch32")
 
 url = "http://images.cocodataset.org/val2017/000000039769.jpg"
 image = Image.open(requests.get(url, stream=True).raw)
 
+metricResult = {"metricResult": {}}
 batch_size_list = [32, 64, 128, 256, 512, 1024, 2048]
 with torch.no_grad():
+    e2e_start_time = time.time()
     for batch_size in batch_size_list:
         images = [image for item in range(batch_size)]
         inputs = processor(
@@ -67,5 +69,8 @@ with torch.no_grad():
         )  # we can take the softmax to get the label probabilities
         print(probs[:5])
         print(probs[-5:-1])
-
-        print("QPS: ", batch_size / (end_time - start_time))
\ No newline at end of file
+        metricResult["metricResult"][f"QPS-batch_size-{batch_size}"] = round(batch_size / (end_time - start_time), 3)
+        print("QPS: ", batch_size / (end_time - start_time))
+    e2e_time = time.time() - e2e_start_time
+    metricResult["metricResult"]["E2E time"] = round(e2e_time, 3)
+    print(metricResult)
\ No newline at end of file
diff --git a/models/nlp/language_model/albert/ixrt/README.md b/models/nlp/language_model/albert/ixrt/README.md
index a2523b43c2a268b7bfa50cce69ef6754b07a7520..2af14b2be4251270f21b3e9646861aed407abcf0 100644
--- a/models/nlp/language_model/albert/ixrt/README.md
+++ b/models/nlp/language_model/albert/ixrt/README.md
@@ -11,17 +11,7 @@ Albert (A Lite BERT) is a variant of the BERT (Bidirectional Encoder Representat
 ```bash
 apt install -y libnuma-dev
 
-pip3 install onnxsim
-pip3 install onnx_graphsurgeon
-pip3 install scikit-learn
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
-pip3 install transformers==4.33.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -52,8 +42,10 @@ onnxsim albert-torch-fp32.onnx albert-torch-fp32-sim.onnx
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./albert-torch-fp32-sim
-export OPTIMIER_FILE=./ixrt-oss/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -77,9 +69,6 @@ ln -s ${PROJ_ROOT}/toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 
-# modify perf_engine.py
-mv ./perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
-
 # edit madlag/albert-base-v2-squad path
 sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
 
@@ -92,6 +81,8 @@ cp ./general_perf/model_zoo/popular/open_albert/*.pt ./ByteMLPerf/byte_infer_per
 
 # run acc script
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/albert-torch-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/albert-torch-fp32.json
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/common.py
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/compile_backend_iluvatar.py
 sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/runtime_backend_iluvatar.py
diff --git a/models/nlp/language_model/albert/ixrt/ci/prepare.sh b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d78865ec0c31e4dbb393d2d89b4d4ac6a2ce391d
--- /dev/null
+++ b/models/nlp/language_model/albert/ixrt/ci/prepare.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+apt install -y libnuma-dev
+
+pip3 install -r requirements.txt
+
+cp /root/data/3rd_party/albert-torch-fp32.json ./
+
+python3 torch2onnx.py --model_path /root/data/checkpoints/open_albert/albert-base-squad.pt --output_path albert-torch-fp32.onnx
+onnxsim albert-torch-fp32.onnx albert-torch-fp32-sim.onnx
+
+mkdir -p data/open_albert
+mv ./albert-torch-fp32-sim.onnx data/open_albert/albert.onnx
+
+# link and install requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+
+# edit madlag/albert-base-v2-squad path
+# sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
+mv madlag ./ByteMLPerf/byte_infer_perf/general_perf/
+
+# copy open_squad data
+cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
+
+# copy open_albert data
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_albert
+cp /root/data/checkpoints/open_albert/*.pt ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_albert
+
+# run acc script
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+# wget http://files.deepspark.org.cn:880/deepspark/madlag.tar
+cp /root/data/3rd_party/madlag.tar ./
+tar xvf madlag.tar
+rm -f madlag.tar
+cp -r /root/data/3rd_party/workloads ./
+sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/common.py
+sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/compile_backend_iluvatar.py
+sed -i 's/tensorrt_legacy/tensorrt/' ./backends/ILUVATAR/runtime_backend_iluvatar.py
\ No newline at end of file
diff --git a/models/nlp/language_model/albert/ixrt/perf_engine.py b/models/nlp/language_model/albert/ixrt/perf_engine.py
deleted file mode 100644
index 089d9860f573bba7e19f84aa20fb830a8fcc22d8..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/albert/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
diff --git a/models/nlp/language_model/albert/ixrt/requirements.txt b/models/nlp/language_model/albert/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b4007837b93c587033b8d324799b9abe0a323e2
--- /dev/null
+++ b/models/nlp/language_model/albert/ixrt/requirements.txt
@@ -0,0 +1,11 @@
+onnxsim
+onnx_graphsurgeon
+scikit-learn
+tqdm
+pycuda
+onnx
+tabulate
+pycocotools
+opencv-python==4.6.0.66
+transformers==4.33.3
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh b/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh
index 293f93555de41555327cd370c2fdef8083e1bbff..e1d8b7f3acb4312a1bd34bae368ac9583b656815 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/bert_base_squad/ixrt/ci/prepare.sh
@@ -1,3 +1,42 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
 pip install -r requirements.txt
+
+# install ixrt run
+bash /root/data/3rd_party/ixrt-0.10.0+corex.4.2.0.20250115-linux_x86_64.run
+
+if [ "$1" = "nvidia" ]; then
+    cmake -S . -B build -DUSE_TENSORRT=true
+    cmake --build build -j16
+else
+    cmake -S . -B build
+    cmake --build build -j16
+fi
+
 mkdir -p ./python/data
 ln -s /root/data/checkpoints/bert_base_uncased_squad/ ./python/data && ln -s /root/data/datasets/squad/ ./python/data
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/builder.py b/models/nlp/language_model/bert_base_squad/ixrt/python/builder.py
deleted file mode 100644
index bd932d4822d2b3afc88dba4901161a7297be422f..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/builder.py
+++ /dev/null
@@ -1,398 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import argparse
-import json
-import tensorrt as trt
-import time
-import sys
-import ctypes
-import os
-import numpy as np
-from builder_utils import load_onnx_weights_and_quant, load_pytorch_weights_and_quant
-from builder_utils import WQKV, BQKV  # Attention Keys
-from builder_utils import W_AOUT, B_AOUT, W_MID, B_MID, W_LOUT, B_LOUT  # Transformer Keys
-from builder_utils import SQD_W, SQD_B  # SQuAD Output Keys
-
-trt_version = [int(n) for n in trt.__version__.split('.')]
-plugin_lib_name = "libnvinfer_plugin.so" if os.getenv('USE_TRT') == 'True' else "libixrt_plugin.so"
-print(plugin_lib_name)
-
-TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
-from load_ixrt_plugin import load_ixrt_plugin, is_nvidia_platform
-load_ixrt_plugin(TRT_LOGGER)
-
-plg_registry = trt.get_plugin_registry()
-registry_list = plg_registry.plugin_creator_list
-print("registry_list: ", [registry.name + '/' + registry.plugin_version for registry in registry_list])
-emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic_IxRT", "1", "")
-qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic_IxRT", "1", "")
-skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic_IxRT", "1", "")
-ffn_plg_creator = plg_registry.get_plugin_creator("CustomFFNPluginDynamic_IxRT", "1", "")
-gelu_plg_creator = plg_registry.get_plugin_creator("CustomGeluPluginDynamic_IxRT", "1", "")
-fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic_IxRT", "1", "")
-
-class BertConfig:
-    def __init__(self, bert_config_path, use_fp16, use_trt):
-        with open(bert_config_path, "r") as f:
-            data = json.load(f)
-            self.num_attention_heads = data["num_attention_heads"]
-            self.hidden_size = data["hidden_size"]
-            self.intermediate_size = data["intermediate_size"]
-            self.num_hidden_layers = data["num_hidden_layers"]
-            self.head_size = self.hidden_size // self.num_attention_heads
-            self.use_fp16 = use_fp16
-            self.use_trt = use_trt
-
-def set_tensor_name(tensor, prefix, name):
-    tensor.name = prefix + name
-
-def set_output_name(layer, prefix, name, out_idx = 0):
-    set_tensor_name(layer.get_output(out_idx), prefix, name)
-
-def set_output_range(layer, maxval, out_idx = 0):
-    layer.get_output(out_idx).set_dynamic_range(-maxval, maxval)
-
-def get_mha_dtype(config):
-    dtype = trt.float32
-    if config.use_fp16:
-        dtype = trt.float16
-    return int(dtype)
-
-def custom_fc(network, input_tensor, out_dims, W, B):
-    pf_out_dims = trt.PluginField("out_dims", np.array(out_dims, dtype=np.int32), trt.PluginFieldType.INT32)
-    pf_type = trt.PluginField("type_id", np.array(int(trt.float16), dtype=np.int32), trt.PluginFieldType.INT32)
-    pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32)
-    fields = [pf_out_dims, pf_type, pf_W]
-    if B is not None:
-        pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32)
-        fields.append(pf_B)
-
-    pfc = trt.PluginFieldCollection(fields)
-    fc_plugin = fc_plg_creator.create_plugin("fcplugin", pfc)
-    plug_inputs = [input_tensor]
-    out_dense = network.add_plugin_v2(plug_inputs, fc_plugin)
-    return out_dense
-
-def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
-    """
-    Add the attention layer
-    """
-    assert(len(input_tensor.shape) == 5)
-    B, S, hidden_size, _, _ = input_tensor.shape
-    num_heads = config.num_attention_heads
-    head_size = int(hidden_size / num_heads)
-
-    Wall = init_dict[prefix + WQKV]
-    Ball = init_dict[prefix + BQKV]
-
-    # FC_attention
-    if config.use_trt:
-        mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball)
-    else:
-        mult_all = custom_fc(network, input_tensor, 3 * hidden_size, Wall, Ball)
-
-    has_mask = imask is not None
-    # QKV2CTX
-    pf_type = trt.PluginField("type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32)
-    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
-    pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32)
-    pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32)
-    pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type])
-    qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc)
-
-    qkv_in = [mult_all.get_output(0)]
-    if has_mask:
-        qkv_in.append(imask)
-    qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug)
-    return qkv2ctx
-
-
-def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None):
-    """
-    Add the skip layer
-    """
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    hidden_size = idims[2]
-
-    dtype = trt.float32
-    if config.use_fp16:
-        dtype = trt.float16
-
-    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
-    wbeta = init_dict[prefix + "beta"]
-    pf_beta = trt.PluginField("beta", wbeta, trt.PluginFieldType.FLOAT32)
-    wgamma = init_dict[prefix + "gamma"]
-    pf_gamma = trt.PluginField("gamma", wgamma, trt.PluginFieldType.FLOAT32)
-    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
-
-    fields = [pf_ld, pf_beta, pf_gamma, pf_type ]
-
-    if bias is not None:
-        pf_bias = trt.PluginField("bias", bias, trt.PluginFieldType.FLOAT32)
-        fields.append(pf_bias)
-
-    pfc = trt.PluginFieldCollection(fields)
-    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)
-
-    skipln_inputs = [input_tensor, skip]
-    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
-    return layer
-
-def ffn_trt(prefix, config, init_dict, network, input_tensor):
-     # FC1 + GELU
-    B_mid = init_dict[prefix + B_MID]
-    W_mid = init_dict[prefix + W_MID]
-    mid_dense = network.add_fully_connected(input_tensor, config.intermediate_size, W_mid, B_mid)
-
-    dtype = trt.float32
-    if config.use_fp16:
-        dtype = trt.float16
-    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
-    pf_ld = trt.PluginField("ld", np.array([config.hidden_size], np.int32), trt.PluginFieldType.INT32)
-
-    pfc = trt.PluginFieldCollection([pf_type, pf_ld])
-    gelu_plug = gelu_plg_creator.create_plugin("gelu", pfc)
-
-    gelu_inputs = [mid_dense.get_output(0)]
-    gelu_layer = network.add_plugin_v2(gelu_inputs, gelu_plug)
-
-    intermediate_act = gelu_layer.get_output(0)
-
-    # FC2
-    # Dense to hidden size
-    B_lout = init_dict[prefix + B_LOUT]
-    W_lout = init_dict[prefix + W_LOUT]
-    out_dense = network.add_fully_connected(intermediate_act, config.hidden_size, W_lout, B_lout)
-    B_lout = None
-
-    out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, out_dense.get_output(0), input_tensor, B_lout)
-    return out_layer
-
-def ffn(prefix, config, init_dict, network, input_tensor):
-    # FC1 + GELU
-    B_mid = init_dict[prefix + B_MID]
-    W_mid = init_dict[prefix + W_MID]
-    B_lout = init_dict[prefix + B_LOUT]
-    W_lout = init_dict[prefix + W_LOUT]
-    pf_out_dim = trt.PluginField("out_dims", np.array(config.hidden_size, np.int32), trt.PluginFieldType.INT32)
-    pf_type = trt.PluginField("type_id", np.array(int(trt.float16), np.int32), trt.PluginFieldType.INT32)
-    pf_W1 = trt.PluginField("W1", W_mid, trt.PluginFieldType.FLOAT32)
-    pf_W2 = trt.PluginField("W2", W_lout, trt.PluginFieldType.FLOAT32)
-    pf_B1 = trt.PluginField("B1", B_mid, trt.PluginFieldType.FLOAT32)
-    pf_act_type = trt.PluginField("act_type", np.array(int(3), np.int32), trt.PluginFieldType.INT32)
-    pfc = trt.PluginFieldCollection([pf_out_dim, pf_type, pf_W1, pf_W2, pf_B1, pf_act_type])
-    ffn_plug = ffn_plg_creator.create_plugin("ffn", pfc)
-
-    ffn_inputs = [input_tensor]
-    ffn_layer = network.add_plugin_v2(ffn_inputs, ffn_plug)
-
-    out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, ffn_layer.get_output(0), input_tensor, B_lout)
-    return out_layer
-
-def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
-    """
-    Add the transformer layer
-    """
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    hidden_size = idims[2]
-
-    context_transposed = attention_layer_opt(prefix + "attention_", config, init_dict, network, input_tensor, imask)
-    attention_heads = context_transposed.get_output(0)
-    
-    # FC0
-    B_aout = init_dict[prefix + B_AOUT]
-    W_aout = init_dict[prefix + W_AOUT]
-    if config.use_trt:
-        attention_out_fc = network.add_fully_connected(attention_heads, hidden_size, W_aout, B_aout)
-    else:
-        attention_out_fc = custom_fc(network, attention_heads, hidden_size, W_aout, B_aout)
-    B_aout = None     
-    
-    skiplayer = skipln(prefix + "attention_output_layernorm_",config, init_dict, network, attention_out_fc.get_output(0), input_tensor, B_aout)
-    attention_ln = skiplayer.get_output(0)
-
-    if config.use_trt:
-        ffn_layer = ffn_trt(prefix, config, init_dict, network, attention_ln)
-    else:
-        ffn_layer = ffn(prefix, config, init_dict, network, attention_ln)
-    return ffn_layer
-
-def bert_model(config, init_dict, network, input_tensor, input_mask):
-    """
-    Create the bert model
-    """
-    prev_input = input_tensor
-    for layer in range(0, config.num_hidden_layers):
-        ss = "l{}_".format(layer)   
-        out_layer = transformer_layer_opt(ss, config,  init_dict, network, prev_input, input_mask)
-        prev_input = out_layer.get_output(0)
-    return prev_input
-
-def squad_output(prefix, config, init_dict, network, input_tensor):
-    """
-    Create the squad output
-    """
-
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    B, S, hidden_size, _, _ = idims
-
-    W_out = init_dict[prefix + SQD_W]
-    B_out = init_dict[prefix + SQD_B]
-
-    if config.use_trt:
-        dense = network.add_fully_connected(input_tensor, 2, W_out, B_out)
-    else:
-        dense = custom_fc(network, input_tensor, 2, W_out, B_out)
-
-    return dense
-
-def emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes):
-    input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-    segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-    input_mask = network.add_input(name="input_mask", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-
-    if len(sequence_lengths) > 1:
-        profile = builder.create_optimization_profile()
-        min_shape = (batch_sizes[0], sequence_lengths[0])
-        opt_shape = (batch_sizes[1], sequence_lengths[1])
-        max_shape = (batch_sizes[2], sequence_lengths[2])
-        assert(sequence_lengths[0] <= sequence_lengths[1] and sequence_lengths[1] <= sequence_lengths[2])
-        
-        print('set dynamic shape -> ', min_shape, opt_shape, max_shape)
-        profile.set_shape("input_ids", min_shape, opt_shape, max_shape)
-        profile.set_shape("segment_ids", min_shape, opt_shape, max_shape)
-        profile.set_shape("input_mask", min_shape, opt_shape, max_shape)
-        builder_config.add_optimization_profile(profile)
-
-    wbeta = trt.PluginField("bert_embeddings_layernorm_beta", weights_dict["bert_embeddings_layernorm_beta"], trt.PluginFieldType.FLOAT32)
-    wgamma = trt.PluginField("bert_embeddings_layernorm_gamma", weights_dict["bert_embeddings_layernorm_gamma"], trt.PluginFieldType.FLOAT32)
-    wwordemb = trt.PluginField("bert_embeddings_word_embeddings", weights_dict["bert_embeddings_word_embeddings"], trt.PluginFieldType.FLOAT32)
-    wtokemb = trt.PluginField("bert_embeddings_token_type_embeddings", weights_dict["bert_embeddings_token_type_embeddings"], trt.PluginFieldType.FLOAT32)
-    wposemb = trt.PluginField("bert_embeddings_position_embeddings", weights_dict["bert_embeddings_position_embeddings"], trt.PluginFieldType.FLOAT32)
-
-    output_fp16 = trt.PluginField("output_fp16", np.array([1 if config.use_fp16 else 0]).astype(np.int32), trt.PluginFieldType.INT32)
-    mha_type = trt.PluginField("mha_type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32)
-
-    pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16, mha_type])
-    fn = emln_plg_creator.create_plugin("embeddings", pfc)
-
-    inputs = [input_ids, segment_ids, input_mask]
-    emb_layer = network.add_plugin_v2(inputs, fn)
-    return emb_layer
-
-def build_engine(batch_sizes, sequence_lengths, config, weights_dict):
-    explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-
-    builder = trt.Builder(TRT_LOGGER)
-    with builder.create_network(explicit_batch_flag) as network, builder.create_builder_config() as builder_config:
-        if config.use_fp16:
-            builder_config.set_flag(trt.BuilderFlag.FP16)
-
-        # Create the network
-        emb_layer = emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes)
-        embeddings = emb_layer.get_output(0)
-        mask_idx = emb_layer.get_output(1)
-        
-        bert_out = bert_model(config, weights_dict, network, embeddings, mask_idx)
-
-        squad_logits = squad_output("cls_", config, weights_dict, network, bert_out)
-        squad_logits_out = squad_logits.get_output(0)
-
-        network.mark_output(squad_logits_out)
-
-        build_start_time = time.time()
-        engine = builder.build_engine(network, builder_config)
-        build_time_elapsed = (time.time() - build_start_time)
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "build engine in {:.3f} Sec".format(build_time_elapsed))
-        return engine
-    
-def str2bool(v):
-    return v.lower() in ('yes', 'true')    
-
-def main():
-    parser = argparse.ArgumentParser(description="TensorRT BERT Sample", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("-z", "--use_trt", type=str2bool, default=False, help = "Whether to use tensorRT or IxRT")
-    parser.add_argument("-x", "--onnx", required=False, help="The ONNX model file path.")
-    parser.add_argument("-pt", "--pytorch", required=False, help="The PyTorch checkpoint file path.")
-    parser.add_argument("-o", "--output", required=True, default="bert_base_384.engine", help="The bert engine file, ex bert.engine")
-    parser.add_argument("-b", "--batch-size", nargs='+', help="Batch size(s) to optimize for. The engine will be usable with any batch size below this, but may not be optimal for smaller sizes. Can be specified multiple times to optimize for more than one batch size.", type=int)
-    parser.add_argument("-s", "--sequence-length", nargs='+', help="Sequence length of the BERT model", type=int)
-    parser.add_argument("-c", "--config-dir", required=True,
-                        help="The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google")
-    parser.add_argument("-f", "--fp16", action="store_true", help="Indicates that inference should be run in FP16 precision", required=False)
-    parser.add_argument("-j", "--squad-json", default="squad/dev-v1.1.json", help="squad json dataset used for int8 calibration", required=False)
-    parser.add_argument("-v", "--vocab-file", default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", help="Path to file containing entire understandable vocab", required=False)
-    parser.add_argument("--verbose", action="store_true", help="Turn on verbose logger and set profiling verbosity to DETAILED", required=False)
-
-    args, _ = parser.parse_known_args()
-    args.batch_size = args.batch_size or [1]
-    args.sequence_length = args.sequence_length or [128]
-    args.use_trt = is_nvidia_platform()
-
-    if len(args.sequence_length) not in [1, 3]:
-        print("Error: You must provide <args.sequence_length> either one or three integers.")
-        sys.exit(1)
-
-    if len(args.batch_size) not in [1, 3]:
-        print("Error: You must provide <args.batch_size> either one or three integers.")
-        sys.exit(1)
-
-    if args.verbose:
-        TRT_LOGGER.min_severity = TRT_LOGGER.VERBOSE
-
-    bert_config_path = args.config_dir
-    TRT_LOGGER.log(TRT_LOGGER.INFO, "Using configuration file: {:}".format(bert_config_path))
-
-    config = BertConfig(bert_config_path, args.fp16, args.use_trt)
-
-    if args.onnx != None:
-        weights_dict = load_onnx_weights_and_quant(args.onnx, config)
-    elif args.pytorch != None:
-        weights_dict = load_pytorch_weights_and_quant(args.pytorch, config)
-    else:
-        raise RuntimeError("You need either specify TF checkpoint using option --ckpt or ONNX using option --onnx to build TRT BERT model.")
-
-    with build_engine(args.batch_size, args.sequence_length, config, weights_dict) as engine:
-        TRT_LOGGER.log(TRT_LOGGER.VERBOSE, "Serializing Engine...")
-        serialized_engine = engine.serialize()
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "Saving Engine to {:}".format(args.output))
-        with open(args.output, "wb") as fout:
-            fout.write(serialized_engine)
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "Done.")
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_int8.py b/models/nlp/language_model/bert_base_squad/ixrt/python/builder_int8.py
deleted file mode 100644
index e51d7c40d5fd0a9d79514b0367b446058ddec14f..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_int8.py
+++ /dev/null
@@ -1,415 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import argparse
-import json
-import tensorrt as trt
-import time
-import sys
-import ctypes
-import os
-import numpy as np
-from builder_utils_int8 import load_pytorch_weights_and_quant
-from builder_utils_int8 import WQKV, BQKV  # Attention Keys
-from builder_utils_int8 import W_AOUT, B_AOUT, W_MID, B_MID, W_LOUT, B_LOUT  # Transformer Keys
-from builder_utils_int8 import SQD_W, SQD_B  # SQuAD Output Keys
-
-trt_version = [int(n) for n in trt.__version__.split('.')]
-
-TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
-from load_ixrt_plugin import load_ixrt_plugin
-load_ixrt_plugin(TRT_LOGGER)
-
-plg_registry = trt.get_plugin_registry()
-registry_list = plg_registry.plugin_creator_list
-print("registry_list: ", [registry.name + '/' + registry.plugin_version for registry in registry_list])
-emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic_IxRT", "2", "")
-qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic_IxRT", "3", "")
-skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic_IxRT", "3", "")
-gelu_plg_creator = plg_registry.get_plugin_creator("CustomGeluPluginDynamic_IxRT", "1", "")
-fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic_IxRT", "2", "")
-
-# 
-class BertConfig:
-    def __init__(self, bert_config_path, use_int8):
-        with open(bert_config_path, "r") as f:
-            data = json.load(f)
-            self.num_attention_heads = data["num_attention_heads"]
-            self.hidden_size = data["hidden_size"]
-            self.intermediate_size = data["intermediate_size"]
-            self.num_hidden_layers = data["num_hidden_layers"]
-            self.head_size = self.hidden_size // self.num_attention_heads
-            self.use_int8 = use_int8
-
-def set_tensor_name(tensor, prefix, name):
-    tensor.name = prefix + name
-
-def set_output_name(layer, prefix, name, out_idx = 0):
-    set_tensor_name(layer.get_output(out_idx), prefix, name)
-
-def set_output_range(layer, maxval, out_idx = 0):
-    layer.get_output(out_idx).set_dynamic_range(-maxval, maxval)
-
-def get_mha_dtype(config):
-    dtype = trt.float32
-    if config.use_int8:
-        dtype = trt.int8
-    return int(dtype)
-
-def custom_fc(prefix, config, init_dict, network, input_tensor, out_dims, W, B):
-    pf_out_dims = trt.PluginField("out_dims", np.array([out_dims], dtype=np.int32), trt.PluginFieldType.INT32)
-    pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32)
-    
-    fields = [pf_out_dims, pf_W]
-
-    if config.use_int8:
-        amax_vec = [init_dict[prefix + "wei_amax"]]
-        if B is not None:
-            pf_B = trt.PluginField("Bias", B, trt.PluginFieldType.FLOAT32)
-            amax_vec.append(init_dict[prefix + "out_amax"])
-            pf_amax = trt.PluginField("fc_amax", np.array(amax_vec, np.float32), trt.PluginFieldType.FLOAT32)
-            fields.append(pf_B)
-            fields.append(pf_amax)
-        else:
-            pf_amax = trt.PluginField("fc_amax", np.array(amax_vec, np.float32), trt.PluginFieldType.FLOAT32)
-            fields.append(pf_amax)
-
-    pfc = trt.PluginFieldCollection(fields)
-    fc_plugin = fc_plg_creator.create_plugin("fcplugin", pfc)
-    plug_inputs = [input_tensor]
-    out_dense = network.add_plugin_v2(plug_inputs, fc_plugin)
-    return out_dense
-
-def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
-    """
-    Add the attention layer
-    """
-    assert(len(input_tensor.shape) == 5)
-    B, S, hidden_size, _, _ = input_tensor.shape
-    num_heads = config.num_attention_heads
-    head_size = int(hidden_size / num_heads)
-
-    Wall = init_dict[prefix + WQKV]
-    Ball = init_dict[prefix + BQKV]
-
-    # FC_attention
-    mult_all = custom_fc(prefix + "self_qkv_", config, init_dict, network, input_tensor, 3*hidden_size, Wall, Ball)
-    set_output_range(mult_all, init_dict[prefix + "self_qkv_out_amax"])
-
-    has_mask = imask is not None
-
-    # QKV2CTX
-    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
-    pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32)
-    fields = [pf_hidden_size, pf_num_heads]
-    dq_probs = [
-                init_dict[prefix + "arrange_qkv_amax"],
-                init_dict[prefix + "softmax_in_amax"],
-                init_dict[prefix + "softmax_out_amax"] 
-                ]
-    pf_dq = trt.PluginField("dq_probs", np.array(dq_probs, np.float32), trt.PluginFieldType.FLOAT32)
-    fields.append(pf_dq)
-    
-    pfc = trt.PluginFieldCollection(fields)
-    qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc)
-
-    qkv_in = [mult_all.get_output(0)]
-    if has_mask:
-        qkv_in.append(imask)
-    qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug)
-    if config.use_int8:
-        set_output_range(qkv2ctx, init_dict[prefix + "output_dense_in_amax"])
-    return qkv2ctx
-
-
-def skipln(prefix, config, init_dict, network, input_tensor, skip, residual, is_last_layer, bias=None):
-    """
-    Add the skip layer
-    """
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    hidden_size = idims[2]
-
-    dtype = trt.float32
-    if config.use_int8:
-        dtype = trt.int8
-
-    wbeta = init_dict[prefix + "beta"]
-    wgamma = init_dict[prefix + "gamma"]
-
-    pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
-    pf_beta = trt.PluginField("beta", wbeta, trt.PluginFieldType.FLOAT32)
-    pf_gamma = trt.PluginField("gamma", wgamma, trt.PluginFieldType.FLOAT32)
-    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
-
-    fields = [pf_ld, pf_beta, pf_gamma, pf_type ]
-    if bias is not None:
-        pf_bias = trt.PluginField("bias", bias, trt.PluginFieldType.FLOAT32)
-        fields.append(pf_bias)
-    if is_last_layer:
-        pf_fp32 = trt.PluginField("output_fp32", np.array([1], np.int32), trt.PluginFieldType.INT32)
-        fields.append(pf_fp32)
-
-    pfc = trt.PluginFieldCollection(fields)
-    skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)
-
-    skipln_inputs = [input_tensor, skip]
-    if config.use_int8:
-        skipln_inputs.append(residual)
-    layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
-    return layer
-
-def ffn(prefix, config, init_dict, network, input_tensor, residual, is_last_layer):
-     # FC1 + GELU
-    B_mid = init_dict[prefix + B_MID]
-    W_mid = init_dict[prefix + W_MID]
-
-    mid_dense = custom_fc(prefix + "intermediate_dense_", config, init_dict, network, input_tensor, config.intermediate_size, W_mid, None)
-    set_output_range(mid_dense, init_dict[prefix + "intermediate_dense_out_amax"])
-
-    dtype = trt.float32
-
-    if config.use_int8:
-        dtype = trt.int8
-
-    pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32)
-    pf_ld = trt.PluginField("ld", np.array([int(config.intermediate_size)], np.int32), trt.PluginFieldType.INT32)
-    fields = [pf_type, pf_ld]
-    if config.use_int8:
-        pf_bias = trt.PluginField("bias", B_mid, trt.PluginFieldType.FLOAT32)
-        fields.append(pf_bias)
-    
-    pfc = trt.PluginFieldCollection(fields)
-    gelu_plug = gelu_plg_creator.create_plugin("gelu", pfc)
-
-    gelu_inputs = [mid_dense.get_output(0)]
-    gelu_layer = network.add_plugin_v2(gelu_inputs, gelu_plug)
-
-    if config.use_int8:
-        set_output_range(gelu_layer, init_dict[prefix + "output_dense_in_amax"])
-
-    intermediate_act = gelu_layer.get_output(0)
-    # set_tensor_name(intermediate_act, prefix, "gelu")
-
-    # FC2
-    # Dense to hidden size
-    B_lout = init_dict[prefix + B_LOUT]
-    W_lout = init_dict[prefix + W_LOUT]
-    out_dense = custom_fc(prefix + "output_dense_", config, init_dict, network, intermediate_act, config.hidden_size, W_lout, None)
-    set_output_range(out_dense, init_dict[prefix + "output_dense_out_amax"])
-
-    out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, out_dense.get_output(0), input_tensor, residual, is_last_layer, B_lout)
-    return out_layer
-
-def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imask, residual, is_last_layer):
-    """
-    Add the transformer layer
-    """
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    hidden_size = idims[2]
-
-    context_transposed = attention_layer_opt(prefix + "attention_", config, init_dict, network, input_tensor, imask)
-    attention_heads = context_transposed.get_output(0)
-    
-    # FC0
-    B_aout = init_dict[prefix + B_AOUT]
-    W_aout = init_dict[prefix + W_AOUT]
-    attention_out_fc = custom_fc(prefix + "attention_output_dense_", config, init_dict, network, attention_heads, hidden_size, W_aout, None)
-    set_output_range(attention_out_fc, init_dict[prefix + "attention_output_dense_out_amax"])   
-    
-    skiplayer = skipln(prefix + "attention_output_layernorm_", config, init_dict, network, attention_out_fc.get_output(0), input_tensor, residual, False, B_aout)
-    if config.use_int8:
-        set_output_range(skiplayer, init_dict[prefix + "intermediate_dense_in_amax"])
-    
-    ffn_layer = ffn(prefix, config, init_dict, network, skiplayer.get_output(0), skiplayer.get_output(1), is_last_layer)
-    return ffn_layer
-
-def bert_model(config, init_dict, network, input_tensor, input_mask, residual):
-    """
-    Create the bert model
-    """
-    prev_input = input_tensor
-    for layer in range(0, config.num_hidden_layers):
-        ss = "l{}_".format(layer) 
-        out_layer = transformer_layer_opt(ss, config,  init_dict, network, prev_input, input_mask, residual,
-                                          True if config.use_int8 and layer == config.num_hidden_layers - 1 else False)
-        prev_input = out_layer.get_output(0)
-        residual = None
-        if config.use_int8:
-            residual = out_layer.get_output(1)
-        if layer < config.num_hidden_layers - 1:
-            set_output_range(out_layer, init_dict["l{}_".format(layer+1) + "attention_self_qkv_in_amax"])
-        else:
-            set_output_range(out_layer, 1)
-
-    return prev_input
-
-def squad_output(prefix, config, init_dict, network, input_tensor):
-    """
-    Create the squad output
-    """
-
-    idims = input_tensor.shape
-    assert len(idims) == 5
-    B, S, hidden_size, _, _ = idims
-
-    W_out = init_dict[prefix + SQD_W]
-    B_out = init_dict[prefix + SQD_B]
-
-    dense = network.add_fully_connected(input_tensor, 2, W_out, B_out)
-    return dense
-
-def emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes):
-    input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-    segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-    input_mask = network.add_input(name="input_mask", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0]))
-
-    if len(sequence_lengths) > 1:
-        profile = builder.create_optimization_profile()
-        min_shape = (batch_sizes[0], sequence_lengths[0])
-        opt_shape = (batch_sizes[1], sequence_lengths[1])
-        max_shape = (batch_sizes[2], sequence_lengths[2])
-        assert(sequence_lengths[0] <= sequence_lengths[1] and sequence_lengths[1] <= sequence_lengths[2])
-        
-        print('set dynamic shape -> ', min_shape, opt_shape, max_shape)
-        profile.set_shape("input_ids", min_shape, opt_shape, max_shape)
-        profile.set_shape("segment_ids", min_shape, opt_shape, max_shape)
-        profile.set_shape("input_mask", min_shape, opt_shape, max_shape)
-        builder_config.add_optimization_profile(profile)
-
-    wbeta = trt.PluginField("bert_embeddings_layernorm_beta", weights_dict["bert_embeddings_layernorm_beta"], trt.PluginFieldType.FLOAT32)
-    wgamma = trt.PluginField("bert_embeddings_layernorm_gamma", weights_dict["bert_embeddings_layernorm_gamma"], trt.PluginFieldType.FLOAT32)
-    wwordemb = trt.PluginField("bert_embeddings_word_embeddings", weights_dict["bert_embeddings_word_embeddings"], trt.PluginFieldType.FLOAT32)
-    wtokemb = trt.PluginField("bert_embeddings_token_type_embeddings", weights_dict["bert_embeddings_token_type_embeddings"], trt.PluginFieldType.FLOAT32)
-    wposemb = trt.PluginField("bert_embeddings_position_embeddings", weights_dict["bert_embeddings_position_embeddings"], trt.PluginFieldType.FLOAT32)
-
-    output_fp16 = trt.PluginField("output_fp16", np.array([0]).astype(np.int32), trt.PluginFieldType.INT32)
-    mha_type = trt.PluginField("mha_type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32)
-
-    pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16, mha_type])
-    fn = emln_plg_creator.create_plugin("embeddings", pfc)
-
-    inputs = [input_ids, segment_ids, input_mask]
-    emb_layer = network.add_plugin_v2(inputs, fn)
-    
-    if config.use_int8:
-        set_output_range(emb_layer, weights_dict["l0_attention_self_qkv_in_amax"])
-        set_output_range(emb_layer, 1.0, 1)
-    return emb_layer
-
-def build_engine(batch_sizes, sequence_lengths, config, weights_dict):
-    explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-
-    builder = trt.Builder(TRT_LOGGER)
-    with builder.create_network(explicit_batch_flag) as network, builder.create_builder_config() as builder_config:
-        network = builder.create_network(explicit_batch_flag) 
-        builder_config = builder.create_builder_config()
-        builder_config.set_flag(trt.BuilderFlag.INT8)
-
-        # Create the network
-        emb_layer = emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes)
-        embeddings = emb_layer.get_output(0)
-        mask_idx = emb_layer.get_output(1)
-
-        residual_buffer = None
-        if config.use_int8:
-            residual_buffer = emb_layer.get_output(2)
-
-        bert_out = bert_model(config, weights_dict, network, embeddings, mask_idx, residual_buffer)
-
-        squad_logits = squad_output("cls_", config, weights_dict, network, bert_out)
-        squad_logits_out = squad_logits.get_output(0)
-
-        network.mark_output(squad_logits_out)
-
-        build_start_time = time.time()
-        engine = builder.build_engine(network, builder_config)
-        build_time_elapsed = (time.time() - build_start_time)
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "build engine in {:.3f} Sec".format(build_time_elapsed))
-        return engine
-    
-def main():
-    parser = argparse.ArgumentParser(description="TensorRT BERT Sample", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("-x", "--onnx", required=False, help="The ONNX model file path.")
-    parser.add_argument("-pt", "--pytorch", required=False, help="The PyTorch checkpoint file path.")
-    parser.add_argument("-o", "--output", required=True, default="bert_base_384.engine", help="The bert engine file, ex bert.engine")
-    parser.add_argument("-b", "--batch-size", nargs='+', help="Batch size(s) to optimize for. The engine will be usable with any batch size below this, but may not be optimal for smaller sizes. Can be specified multiple times to optimize for more than one batch size.", type=int)
-    parser.add_argument("-s", "--sequence-length", nargs='+', help="Sequence length of the BERT model", type=int)
-    parser.add_argument("-c", "--config-dir", required=True,
-                        help="The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google")
-    parser.add_argument("-f", "--fp16", action="store_true", help="Indicates that inference should be run in FP16 precision", required=False)
-    parser.add_argument("-i", "--int8", action="store_true", help="Indicates that inference should be run in INT8 precision", required=False)
-    parser.add_argument("-j", "--squad-json", default="squad/dev-v1.1.json", help="squad json dataset used for int8 calibration", required=False)
-    parser.add_argument("-v", "--vocab-file", default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", help="Path to file containing entire understandable vocab", required=False)
-    parser.add_argument("--verbose", action="store_true", help="Turn on verbose logger and set profiling verbosity to DETAILED", required=False)
-
-    args, _ = parser.parse_known_args()
-    args.batch_size = args.batch_size or [1]
-    args.sequence_length = args.sequence_length or [128]
-
-    if len(args.sequence_length) not in [1, 3]:
-        print("Error: You must provide <args.sequence_length> either one or three integers.")
-        sys.exit(1)
-
-    if len(args.batch_size) not in [1, 3]:
-        print("Error: You must provide <args.batch_size> either one or three integers.")
-        sys.exit(1)
-
-    if args.verbose:
-        TRT_LOGGER.min_severity = TRT_LOGGER.VERBOSE
-
-    bert_config_path = args.config_dir
-    TRT_LOGGER.log(TRT_LOGGER.INFO, "Using configuration file: {:}".format(bert_config_path))
-
-    config = BertConfig(bert_config_path, args.int8)
-
-    if args.onnx != None:
-        if args.int8:
-            raise RuntimeError("int8 onnx not supported now!!!")
-    elif args.pytorch != None:
-        weights_dict = load_pytorch_weights_and_quant(args.pytorch, config)
-    else:
-        raise RuntimeError("You need either specify TF checkpoint using option --ckpt or ONNX using option --onnx to build TRT BERT model.")
-
-    # engine = build_engine(args.batch_size, args.workspace_size, args.sequence_length, config, weights_dict, args.squad_json, args.vocab_file, None, args.calib_num, args.verbose)
-    with build_engine(args.batch_size, args.sequence_length, config, weights_dict) as engine:
-        TRT_LOGGER.log(TRT_LOGGER.VERBOSE, "Serializing Engine...")
-        serialized_engine = engine.serialize()
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "Saving Engine to {:}".format(args.output))
-        with open(args.output, "wb") as fout:
-            fout.write(serialized_engine)
-        TRT_LOGGER.log(TRT_LOGGER.INFO, "Done.")
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils.py b/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils.py
deleted file mode 100644
index 767379778633cafe889a4df414d8cc487495559b..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import onnx
-import numpy as np
-import tensorrt as trt
-import json
-import struct
-import torch
-
-TRT_LOGGER = trt.Logger(trt.Logger.INFO)
-
-"""
-Attentions Keys
-"""
-WQ = "self_query_kernel"
-BQ = "self_query_bias"
-WK = "self_key_kernel"
-BK = "self_key_bias"
-WV = "self_value_kernel"
-BV = "self_value_bias"
-WQKV = "self_qkv_kernel"
-BQKV = "self_qkv_bias"
-
-"""
-Transformer Keys
-"""
-W_AOUT = "attention_output_dense_kernel"
-B_AOUT = "attention_output_dense_bias"
-AOUT_LN_BETA = "attention_output_layernorm_beta"
-AOUT_LN_GAMMA = "attention_output_layernorm_gamma"
-W_MID = "intermediate_dense_kernel"
-B_MID = "intermediate_dense_bias"
-W_LOUT = "output_dense_kernel"
-B_LOUT = "output_dense_bias"
-LOUT_LN_BETA = "output_layernorm_beta"
-LOUT_LN_GAMMA = "output_layernorm_gamma"
-
-"""
-Squad Output Keys
-"""
-SQD_W = "squad_output_weights"
-SQD_B = "squad_output_bias"
-
-
-def get_onnx_weight_dict(tensor_dict, config):
-    N = config.num_attention_heads
-    H = config.head_size
-    hidden_size = config.hidden_size
-
-    weights_dict = dict()
-    for outname, tensor in tensor_dict.items():
-        if outname.find("_amax") != -1:
-            weights_dict[outname] = tensor.flatten()
-        elif outname.find(BQ) != -1:
-            prefix = outname[:outname.find(BQ)]
-
-            Wqkv = np.zeros((3, hidden_size, hidden_size), np.float32)
-            Bqkv = np.zeros((3, hidden_size), np.float32)
-
-            Wqkv[0,:,:] = tensor_dict[prefix + WQ]
-            Wqkv[1,:,:] = tensor_dict[prefix + WK]
-            Wqkv[2,:,:] = tensor_dict[prefix + WV]
-            Bqkv[0,:] = tensor
-            Bqkv[1,:] = tensor_dict[prefix + BK]
-            Bqkv[2,:] = tensor_dict[prefix + BV]
-
-            weights_dict[prefix + WQKV] = Wqkv.flatten()
-            weights_dict[prefix + BQKV] = Bqkv.flatten()
-            weights_dict[prefix + WQKV + "_notrans"] = np.ascontiguousarray(Wqkv.T).flatten()
-
-        elif outname.find(BK) != -1 or outname.find(BV) != -1 or outname.find(WQ) != -1 or outname.find(WK) != -1 or outname.find(WV) != -1:
-            pass
-        else:
-            flat_tensor = np.ascontiguousarray(tensor).flatten()
-            weights_dict[outname] = flat_tensor
-
-    return weights_dict
-
-def onnx_to_trt_name(onnx_name):
-    """
-    Converting variables in the onnx checkpoint to names corresponding to the naming convention used in the TF version, expected by the builder
-    """
-    qkv_strings = {'key', 'value', 'query', 'query_key_value'}
-    onnx_name = onnx_name.lower()
-    toks = [t.strip('_') for t in onnx_name.split('.')]
-    if toks[0] == 'bert': #embeddings or encoder
-        if toks[1] == 'encoder': #transformer
-            # Token conversions for sparse checkpoints
-            if toks[-2] == 'dense_act':
-                toks[-2] = 'dense'
-            elif toks[-3] == 'dense_act':
-                if toks[-2] == 'input_quantizer':
-                    toks[-2] = 'input'
-                elif toks[-2] == 'weight_quantizer':
-                    toks[-2] = 'kernel'
-                toks[-3] = 'dense'
-            elif toks[-2].startswith('matmul'):
-                toks[-2] = {
-                    'matmul_q_quantizer': 'qv_a_input_quantizer',
-                    'matmul_k_quantizer': 'qv_b_input_quantizer',
-                    'matmul_v_quantizer': 'av_b_input_quantizer',
-                    'matmul_a_quantizer': 'av_a_input_quantizer',
-                }[toks[-2].replace('input_', '')]
-
-            # Token conversions for all checkpoints
-            if toks[-2] == 'layernorm': #bias->beta, weight->gamma
-                toks[-1] = 'beta' if toks[-1] == 'bias' else 'gamma'
-            elif (toks[-2] == 'dense' or toks[-2] in qkv_strings) and toks[-1] == 'weight':
-                toks[-1] = 'kernel'
-            elif (toks[-3] == 'dense' or toks[-3] in qkv_strings) and toks[-1] == 'amax':
-                if toks[-2] == 'weight_quantizer':
-                    toks[-2] = 'kernel'
-                elif toks[-2] == 'input_quantizer':
-                    toks[-2] = 'input'
-
-            if 'final_input_quantizer' not in toks[2]:
-                ind = toks.index('layers')+1 if 'layers' in toks else 3
-                toks = toks[ind:]
-                toks[0] = 'l{}'.format(int(toks[0]))
-        else:
-            if toks[-2] == 'layernorm': #bias->beta, weight->gamma
-                toks[-1] = 'beta' if toks[-1] == 'bias' else 'gamma'
-            else: #embeddings: drop "_weight" suffix
-                if toks[-1] == 'amax':
-                    toks[-2] = 'amax'
-                toks = toks[:-1]
-    elif 'qa' in onnx_name:
-        name = 'cls_squad_output_bias' if toks[-1] == 'bias' else 'cls_squad_output_weights'
-        return name
-    else:
-        print("Encountered unknown case:", onnx_name)
-        assert(False)
-    parsed = '_'.join(toks)
-    return parsed
-
-def load_onnx_weights_and_quant(path, config):
-    """
-    Load the weights from the onnx checkpoint
-    """
-    model = onnx.load(path)
-    weights = model.graph.initializer
-    tensor_dict = dict((onnx_to_trt_name(w.name), np.frombuffer(w.raw_data, np.int8).reshape(w.dims))
-                       if w.name.split('_')[-1] == 'mask' else
-                       (onnx_to_trt_name(w.name), np.frombuffer(w.raw_data, np.float32).reshape(w.dims))
-                       for w in weights)
-    return get_onnx_weight_dict(tensor_dict, config)
-
-def load_pytorch_weights_and_quant(path, config):
-    """
-    Load the weights from the pytorch checkpoint
-    """
-    state_dict = torch.load(path, map_location='cpu')["model"]
-    tensor_dict = {onnx_to_trt_name(name):val.numpy() for name, val in state_dict.items()}
-    return get_onnx_weight_dict(tensor_dict, config)
-
-class BertConfig:
-    def __init__(self, bert_config_path, use_fp16, use_int8=False):
-        with open(bert_config_path, "r") as f:
-            data = json.load(f)
-            self.num_attention_heads = data["num_attention_heads"]
-            self.hidden_size = data["hidden_size"]
-            self.intermediate_size = data["intermediate_size"]
-            self.num_hidden_layers = data["num_hidden_layers"]
-            self.head_size = self.hidden_size // self.num_attention_heads
-            self.use_fp16 = use_fp16
-            self.use_int8 = use_int8
-
-if __name__ == '__main__':
-    bert_config_path = '../bert-large-uncased/bert_config.json'
-    onnx_model_path = '../bert-large-uncased/bert_large_v1_1_fake_quant.onnx'
-    weight_save_path = "../bert-large-uncased/bert_large_v1_1.wts"
-    config = config = BertConfig(bert_config_path, True)
-    weights_dict = load_onnx_weights_and_quant(onnx_model_path, config)
-    f = open(weight_save_path, "w")
-    num = 0
-    for key, value in weights_dict.items():
-        if key.find('_amax') == -1:
-            num += 1
-    
-    f.write('{}\n'.format(num))
-    for key, value in weights_dict.items():
-        print('key: ', key)
-        if key.find('_amax') != -1:
-            continue
-        f.write("{} {}".format(key, len(value)))
-        print(len(value))
-        for v in value:
-            f.write(" ")
-            f.write(struct.pack('>f', float(v)).hex())
-        f.write("\n")
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils_int8.py b/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils_int8.py
deleted file mode 100644
index 56ac8d1889912cb98817d5960767d94522441030..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/builder_utils_int8.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import numpy as np
-import tensorrt as trt
-import json
-import struct
-import torch
-
-TRT_LOGGER = trt.Logger(trt.Logger.INFO)
-
-"""
-Attentions Keys
-"""
-WQ = "self_query_kernel"
-BQ = "self_query_bias"
-WK = "self_key_kernel"
-BK = "self_key_bias"
-WV = "self_value_kernel"
-BV = "self_value_bias"
-WQKV = "self_qkv_kernel"
-BQKV = "self_qkv_bias"
-
-"""
-Transformer Keys
-"""
-W_AOUT = "attention_output_dense_kernel"
-B_AOUT = "attention_output_dense_bias"
-AOUT_LN_BETA = "attention_output_layernorm_beta"
-AOUT_LN_GAMMA = "attention_output_layernorm_gamma"
-W_MID = "intermediate_dense_kernel"
-B_MID = "intermediate_dense_bias"
-W_LOUT = "output_dense_kernel"
-B_LOUT = "output_dense_bias"
-LOUT_LN_BETA = "output_layernorm_beta"
-LOUT_LN_GAMMA = "output_layernorm_gamma"
-
-"""
-Squad Output Keys
-"""
-SQD_W = "squad_output_weights"
-SQD_B = "squad_output_bias"
-
-ixrt_name_map = {
-    "bert.embeddings.LayerNorm.bias": "bert_embeddings_layernorm_beta",
-    "bert.embeddings.LayerNorm.weight" : "bert_embeddings_layernorm_gamma",
-    "bert.embeddings.word_embeddings.weight" : "bert_embeddings_word_embeddings",
-    "bert.embeddings.token_type_embeddings.weight" : "bert_embeddings_token_type_embeddings",
-    "bert.embeddings.position_embeddings.weight" : "bert_embeddings_position_embeddings",
-    "qa_outputs.weight" : "cls_squad_output_weights",
-    "qa_outputs.bias" : "cls_squad_output_bias"
-}
-
-ixrt_atten_name_map = {
-    "bert.encoder.layer.{}.self_attn.qkv_proj.weight" : "l{}_attention_self_qkv_kernel",
-    "bert.encoder.layer.{}.self_attn.qkv_proj.bias" : "l{}_attention_self_qkv_bias",
-    "bert.encoder.layer.{}.self_attn.out_proj.bias" : "l{}_attention_output_dense_bias",
-    "bert.encoder.layer.{}.self_attn.out_proj.weight" : "l{}_attention_output_dense_kernel",
-    "bert.encoder.layer.{}.fc1.weight" : "l{}_intermediate_dense_kernel",
-    "bert.encoder.layer.{}.fc1.bias" : "l{}_intermediate_dense_bias",
-    "bert.encoder.layer.{}.fc2.weight" : "l{}_output_dense_kernel",
-    "bert.encoder.layer.{}.fc2.bias" : "l{}_output_dense_bias", 
-    "bert.encoder.layer.{}.self_attn_layer_norm.weight" : "l{}_attention_output_layernorm_gamma",
-    "bert.encoder.layer.{}.self_attn_layer_norm.bias" : "l{}_attention_output_layernorm_beta",
-    "bert.encoder.layer.{}.final_layer_norm.weight" : "l{}_output_layernorm_gamma",
-    "bert.encoder.layer.{}.final_layer_norm.bias" : "l{}_output_layernorm_beta",
-    "bert.encoder.layer.{}.self_attn.qkv_proj.weight_quant.clip.clip_value_max" : "l{}_attention_self_qkv_wei_amax",
-    "bert.encoder.layer.{}.self_attn.qkv_proj.input_quant.clip.clip_value_max" : "l{}_attention_self_qkv_in_amax",
-    "bert.encoder.layer.{}.self_attn.qkv_proj.output_quant.clip.clip_value_max" : "l{}_attention_self_qkv_out_amax",
-    "bert.encoder.layer.{}.self_attn.attention_quant.clip.clip_value_max" : "l{}_attention_arrange_qkv_amax",
-    "bert.encoder.layer.{}.self_attn.softmax_in_quant.clip.clip_value_max" : "l{}_attention_softmax_in_amax",
-    "bert.encoder.layer.{}.self_attn.atten_score_out_quant.clip.clip_value_max" : "l{}_attention_softmax_out_amax",
-    "bert.encoder.layer.{}.self_attn.out_proj.input_quant.clip.clip_value_max" : "l{}_attention_output_dense_in_amax",
-    "bert.encoder.layer.{}.self_attn.out_proj.output_quant.clip.clip_value_max" : "l{}_attention_output_dense_out_amax",
-    "bert.encoder.layer.{}.self_attn.out_proj.weight_quant.clip.clip_value_max" : "l{}_attention_output_dense_wei_amax",
-    "bert.encoder.layer.{}.fc1.input_quant.clip.clip_value_max" : "l{}_intermediate_dense_in_amax",
-    "bert.encoder.layer.{}.fc1.output_quant.clip.clip_value_max" : "l{}_intermediate_dense_out_amax",
-    "bert.encoder.layer.{}.fc1.weight_quant.clip.clip_value_max" : "l{}_intermediate_dense_wei_amax",
-    "bert.encoder.layer.{}.fc2.input_quant.clip.clip_value_max" : "l{}_output_dense_in_amax",
-    "bert.encoder.layer.{}.fc2_out_quant.clip.clip_value_max" : "l{}_output_dense_out_amax",
-    "bert.encoder.layer.{}.fc2.weight_quant.clip.clip_value_max" : "l{}_output_dense_wei_amax"
-}
-
-def get_weight_dict(tensor_dict, config):
-    N = config.num_attention_heads
-    H = config.head_size
-    hidden_size = config.hidden_size
-
-    weights_dict = dict()
-    for outname, tensor in tensor_dict.items():
-        if outname.find("_amax") != -1:
-            weights_dict[outname] = tensor.item()
-        elif outname.find(BQ) != -1:
-            prefix = outname[:outname.find(BQ)]
-
-            Wqkv = np.zeros((3, hidden_size, hidden_size), np.float32)
-            Bqkv = np.zeros((3, hidden_size), np.float32)
-
-            Wqkv[0,:,:] = tensor_dict[prefix + WQ]
-            Wqkv[1,:,:] = tensor_dict[prefix + WK]
-            Wqkv[2,:,:] = tensor_dict[prefix + WV]
-            Bqkv[0,:] = tensor
-            Bqkv[1,:] = tensor_dict[prefix + BK]
-            Bqkv[2,:] = tensor_dict[prefix + BV]
-
-            weights_dict[prefix + WQKV] = Wqkv.flatten()
-            weights_dict[prefix + BQKV] = Bqkv.flatten()
-        elif outname.find(BK) != -1 or outname.find(BV) != -1 or outname.find(WQ) != -1 or outname.find(WK) != -1 or outname.find(WV) != -1:
-            pass
-        else:
-            flat_tensor = np.ascontiguousarray(tensor).flatten()
-            weights_dict[outname] = flat_tensor
-
-    return weights_dict
-
-def pytorch_to_trt_name(state_dict, num_layer):
-    tensor_dict = {}
-    for name in ixrt_name_map.keys():
-        tensor_dict[ixrt_name_map[name]] = state_dict[name]
-
-    for name in ixrt_atten_name_map.keys():
-        for layer_id in range(num_layer):
-            key_name = name.format(layer_id)
-            value_name = ixrt_atten_name_map[name].format(layer_id)
-            tensor_dict[value_name] = state_dict[key_name]
-    return tensor_dict
-
-def load_pytorch_weights_and_quant(path, config):
-    """
-    Load the weights from the pytorch checkpoint
-    """
-    state_dict = torch.load(path, map_location='cpu')
-    tensor_dict = pytorch_to_trt_name(state_dict, config.num_hidden_layers)
-    return get_weight_dict(tensor_dict, config)
-
-class BertConfig:
-    def __init__(self, bert_config_path, use_fp16, use_int8=False, use_trt=False):
-        with open(bert_config_path, "r") as f:
-            data = json.load(f)
-            self.num_attention_heads = data["num_attention_heads"]
-            self.hidden_size = data["hidden_size"]
-            self.intermediate_size = data["intermediate_size"]
-            self.num_hidden_layers = data["num_hidden_layers"]
-            self.head_size = self.hidden_size // self.num_attention_heads
-            self.use_fp16 = use_fp16
-            self.use_int8 = use_int8
-            self.use_trt = use_trt
-
-if __name__ == '__main__':
-    bert_config_path = './data/bert-large-uncased/bert_config.json'
-    pytorch_model_path = './data/bert-large-uncased/bert_large_int8_qat.bin'
-    weight_save_path = "./data/bert-large-uncased/bert_large_v1_1_int8.wts"
-    config = BertConfig(bert_config_path, True)
-    weights_dict = load_pytorch_weights_and_quant(pytorch_model_path, config)
-    f = open(weight_save_path, "w")
-    num = 0
-    for key, value in weights_dict.items():
-        if key.find('_amax') == -1:
-            num += 1
-    
-    f.write('{}\n'.format(num))
-    for key, value in weights_dict.items():
-        if key.find('_amax') != -1:
-            continue
-        print('key: ', key)
-        f.write("{} {}".format(key, len(value)))
-        print(len(value))
-        for v in value:
-            f.write(" ")
-            f.write(struct.pack('>f', float(v)).hex())
-        f.write("\n")
-
-    f.write('{}\n'.format(len(weights_dict) - num))
-    for key, value in weights_dict.items():
-        if key.find('_amax') == -1:
-            continue
-        print('key: ', key)
-        print('value: ', value)
-        f.write('{} '.format(key))
-        f.write(struct.pack('>f', float(weights_dict[key])).hex())
-        f.write('\n')
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py b/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py
deleted file mode 100644
index a509f0713867980168192ae9ed884821072c4ecf..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/inference.py
+++ /dev/null
@@ -1,413 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import sys
-import time
-import json
-import ctypes
-import argparse
-import collections
-import numpy as np
-import tensorrt as trt
-import pycuda.driver as cuda
-import pycuda.autoinit
-
-import helpers.tokenization as tokenization
-import helpers.data_processing as dp
-from tqdm import tqdm
-import math
-
-from load_ixrt_plugin import load_ixrt_plugin
-TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
-
-def parse_args():
-    """
-    Parse command line arguments
-    """
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument('-e', '--engine',
-            help='Path to BERT TensorRT engine')
-    parser.add_argument("-b", "--batch-size", default=1, help="Batch size for inference.", type=int)
-    parser.add_argument('-p', '--passage', nargs='*',
-            help='Text for paragraph/passage for BERT QA',
-            default='')
-    parser.add_argument('-pf', '--passage-file',
-            help='File containing input passage',
-            default='')
-    parser.add_argument('-q', '--question', nargs='*',
-            help='Text for query/question for BERT QA',
-            default='')
-    parser.add_argument('-qf', '--question-file',
-            help='File containing input question',
-            default='')
-    parser.add_argument('-sq', '--squad-json',
-            help='SQuAD json file',
-            default='')
-    parser.add_argument('-o', '--output-prediction-file',
-            help='Output prediction file for SQuAD evaluation',
-            default='./predictions.json')
-    parser.add_argument('-v', '--vocab-file',
-            help='Path to file containing entire understandable vocab')
-    parser.add_argument('-s', '--sequence-length',
-            help='The sequence length to use. Defaults to 128',
-            default=128, type=int)
-    parser.add_argument('--max-query-length',
-            help='The maximum length of a query in number of tokens. Queries longer than this will be truncated',
-            default=64, type=int)
-    parser.add_argument('--max-answer-length',
-            help='The maximum length of an answer that can be generated',
-            default=30, type=int)
-    parser.add_argument('--n-best-size',
-            help='Total number of n-best predictions to generate in the nbest_predictions.json output file',
-            default=20, type=int)
-    parser.add_argument('--doc-stride',
-            help='When splitting up a long document into chunks, what stride to take between chunks',
-            default=128, type=int)
-    parser.add_argument('--target_qps',
-            help="target qps metric", required=False, type=int)
-    parser.add_argument("-i", "--int8", action="store_true", help="Indicates that inference should be run in INT8 precision", required=False)
-    args, _ = parser.parse_known_args()
-    return args
-
-if __name__ == '__main__':
-    args = parse_args()
-
-    paragraph_text = None
-    squad_examples = None
-    output_prediction_file = None
-
-    if not args.passage == '':
-        paragraph_text = ' '.join(args.passage)
-    elif not args.passage_file == '':
-        f = open(args.passage_file, 'r')
-        paragraph_text = f.read()
-    elif not args.squad_json == '':
-        squad_examples = dp.read_squad_json(args.squad_json)
-        output_prediction_file = args.output_prediction_file
-    else:
-        paragraph_text = input("Paragraph: ")
-
-    question_text = None
-    if not args.question == '':
-        question_text = ' '.join(args.question)
-    elif not args.question_file == '':
-        f = open(args.question_file, 'r')
-        question_text = f.read()
-
-    tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=True)
-    # When splitting up a long document into chunks, how much stride to take between chunks.
-    doc_stride = args.doc_stride
-    # The maximum total input sequence length after WordPiece tokenization.
-    # Sequences longer than this will be truncated, and sequences shorter
-    max_seq_length = args.sequence_length
-
-    def question_features(tokens, question):
-        # Extract features from the paragraph and question
-        return dp.convert_example_to_features(tokens, question, tokenizer, max_seq_length, doc_stride, args.max_query_length)
-
-    load_ixrt_plugin(TRT_LOGGER)
-
-    # The first context created will use the 0th profile. A new context must be created
-    # for each additional profile needed. Here, we only use batch size 1, thus we only need the first profile.
-    with open(args.engine, 'rb') as f:
-        runtime = trt.Runtime(TRT_LOGGER)
-        engine = runtime.deserialize_cuda_engine(f.read())
-        context = engine.create_execution_context()
-
-        # select engine profile
-        selected_profile = -1
-        num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles
-        for idx in range(engine.num_optimization_profiles):
-            profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)
-            if profile_shape[0][0] <= args.batch_size and profile_shape[2][0] >= args.batch_size and profile_shape[0][1] <= max_seq_length and profile_shape[2][1] >= max_seq_length:
-                selected_profile = idx
-                break
-        if selected_profile == -1:
-            raise RuntimeError("Could not find any profile that can run batch size {}.".format(args.batch_size))
-
-        # Create a stream in which to copy inputs/outputs and run inference.
-        stream = cuda.Stream()
-
-        # if args.use_trt:
-        #     context.active_optimization_profile = selected_profile
-        # else:
-        context.set_optimization_profile_async(selected_profile, stream.handle)
-        binding_idx_offset = selected_profile * num_binding_per_profile
-
-        input_shape = (args.batch_size, max_seq_length)
-        input_nbytes = trt.volume(input_shape) * 4
-        for binding in range(3):
-            context.set_binding_shape(binding, input_shape)
-        assert context.all_binding_shapes_specified
-
-        # Allocate device memory for inputs.
-        d_inputs = [cuda.mem_alloc(input_nbytes) for binding in range(3)]
-
-        # Allocate output buffer by querying the size from the context. This may be different for different input shapes.
-        h_output = cuda.pagelocked_empty(tuple(context.get_binding_shape(binding_idx_offset + 3)), dtype=np.float32)
-        d_output = cuda.mem_alloc(h_output.nbytes)
-
-        def inference(features, tokens):
-            global h_output
-
-            _NetworkOutput = collections.namedtuple(  # pylint: disable=invalid-name
-                    "NetworkOutput",
-                    ["start_logits", "end_logits", "feature_index"])
-            networkOutputs = []
-
-            eval_time_elapsed = 0
-            for feature_index, feature in enumerate(features):
-                # Copy inputs
-                input_ids_batch = np.repeat(np.expand_dims(feature.input_ids, 0), args.batch_size, axis=0)
-                segment_ids_batch = np.repeat(np.expand_dims(feature.segment_ids, 0), args.batch_size, axis=0)
-                input_mask_batch = np.repeat(np.expand_dims(feature.input_mask, 0), args.batch_size, axis=0)
-
-                input_ids = cuda.register_host_memory(np.ascontiguousarray(input_ids_batch.ravel()))
-                segment_ids = cuda.register_host_memory(np.ascontiguousarray(segment_ids_batch.ravel()))
-                input_mask = cuda.register_host_memory(np.ascontiguousarray(input_mask_batch.ravel()))
-
-                eval_start_time = time.time()
-                cuda.memcpy_htod_async(d_inputs[0], input_ids, stream)
-                cuda.memcpy_htod_async(d_inputs[1], segment_ids, stream)
-                cuda.memcpy_htod_async(d_inputs[2], input_mask, stream)
-
-                # Run inference
-                context.execute_async_v2(bindings=[0 for i in range(binding_idx_offset)] +[int(d_inp) for d_inp in d_inputs] + [int(d_output)], stream_handle=stream.handle)
-                # Synchronize the stream
-                stream.synchronize()
-                eval_time_elapsed += (time.time() - eval_start_time)
-
-                # Transfer predictions back from GPU
-                cuda.memcpy_dtoh_async(h_output, d_output, stream)
-                stream.synchronize()
-
-                # Only retrieve and post-process the first batch
-                batch = h_output[0]
-
-                networkOutputs.append(_NetworkOutput(
-                    start_logits = np.array(batch.squeeze()[:, 0]),
-                    end_logits = np.array(batch.squeeze()[:, 1]),
-                    feature_index = feature_index
-                    ))
-
-            eval_time_elapsed /= len(features)
-
-            # Total number of n-best predictions to generate in the nbest_predictions.json output file
-            n_best_size = 20
-
-            # The maximum length of an answer that can be generated. This is needed
-            # because the start and end predictions are not conditioned on one another
-            max_answer_length = 30
-
-            prediction, nbest_json, scores_diff_json = dp.get_predictions(tokens, features,
-                    networkOutputs, args.n_best_size, args.max_answer_length)
-
-            return eval_time_elapsed, prediction, nbest_json
-
-        def print_single_query(eval_time_elapsed, prediction, nbest_json):
-            print("------------------------")
-            print("Running inference in {:.3f} Sentences/Sec".format(args.batch_size/eval_time_elapsed))
-            print("------------------------")
-
-            print("Answer: '{}'".format(prediction))
-            print("With probability: {:.3f}".format(nbest_json[0]['probability'] * 100.0))
-
-        def inference_all_dynamic(features_list, squad_examples, sort_index, all_precision):
-            # h_output = torch.tensor((args.batch_size, max_seq_length, 2))
-            global h_output
-            _NetworkOutput = collections.namedtuple(  # pylint: disable=invalid-name
-                    "NetworkOutput",
-                    ["start_logits", "end_logits", "feature_index"])
-            networkOutputs = []
-
-            batch_input_ids = []
-            batch_segment_ids = []
-            all_token_ids = []
-            batch_example_list = []
-            batch_feature_list = []
-            batch_feature = []
-            batch_example = []
-            max_batch_length = 0
-            seq_length_list = []
-            for index in tqdm(sort_index):
-                batch_feature.append(features_list[index])
-                batch_example.append(squad_examples[index])
-                max_batch_length = max(max_batch_length, len(features_list[index].input_ids))
-                if args.int8:
-                    max_batch_length = max_seq_length
-                else:
-                    max_batch_length = math.ceil(max_batch_length / 2) * 2
-                seq_length_list.append(len(features_list[index].input_ids))
-                if len(batch_feature) == args.batch_size:
-                    batch_input_ids = [
-                        np.pad(bf.input_ids, (0, max_batch_length - bf.input_ids.shape[0]), 'constant',constant_values = (0)).reshape(1, -1)
-                            for bf in batch_feature
-                    ]
-                    batch_input_ids = np.concatenate(batch_input_ids, axis=0)
-                    batch_segment_ids = [
-                        np.pad(bf.segment_ids, (0, max_batch_length - bf.segment_ids.shape[0]), 'constant',constant_values = (0)).reshape(1, -1)
-                            for bf in batch_feature
-                    ]
-                    batch_segment_ids = np.concatenate(batch_segment_ids, axis=0)
-                    all_token_ids.append(
-                        [
-                            batch_input_ids.astype(np.int32),
-                            batch_segment_ids.astype(np.int32)
-                        ]
-                    )
-                    batch_example_list.append(batch_example)
-                    batch_feature_list.append(batch_feature)
-                    batch_input_ids = []
-                    batch_segment_ids = []
-                    batch_feature = []
-                    batch_example = []
-                    max_batch_length = 0
-
-            if len(batch_feature):
-                batch_input_ids = [
-                    np.pad(bf.input_ids, (0, max_batch_length - bf.input_ids.shape[0]), 'constant',constant_values = (0)).reshape(1, -1)
-                        for bf in batch_feature
-                ]
-                batch_input_ids = np.concatenate(batch_input_ids, axis=0)
-                batch_segment_ids = [
-                    np.pad(bf.segment_ids, (0, max_batch_length - bf.segment_ids.shape[0]), 'constant',constant_values = (0)).reshape(1, -1)
-                        for bf in batch_feature
-                ]
-                batch_segment_ids = np.concatenate(batch_segment_ids, axis=0)
-                all_token_ids.append(
-                    [
-                        batch_input_ids.astype(np.int32),
-                        batch_segment_ids.astype(np.int32)
-                    ]
-                )
-                batch_input_ids = []
-                batch_segment_ids = []
-                batch_example_list.append(batch_example)
-                batch_feature_list.append(batch_feature)
-
-            # warm up
-            for i in range(20):
-                for binding in range(3):
-                    context.set_binding_shape(binding, (args.batch_size, max_seq_length))
-                assert context.all_binding_shapes_specified
-                cuda.memcpy_htod_async(d_inputs[0], np.zeros((args.batch_size, max_seq_length), dtype=np.int32).ravel(), stream)
-                cuda.memcpy_htod_async(d_inputs[1], np.zeros((args.batch_size, max_seq_length), dtype=np.int32).ravel(), stream)
-                context.execute_async_v2(bindings=[0 for i in range(binding_idx_offset)] +[int(d_inp) for d_inp in d_inputs] + [int(d_output)], stream_handle=stream.handle)
-            stream.synchronize()
-
-            start_time = time.time()
-            output_index = 0
-            for input_ids, segment_ids in tqdm(all_token_ids):
-                for binding in range(3):
-                    context.set_binding_shape(binding, input_ids.shape)
-                assert context.all_binding_shapes_specified
-
-                cuda.memcpy_htod_async(d_inputs[0], input_ids.ravel(), stream)
-                cuda.memcpy_htod_async(d_inputs[1], segment_ids.ravel(), stream)
-                stream.synchronize()
-
-                context.execute_async_v2(bindings=[0 for i in range(binding_idx_offset)] +[int(d_inp) for d_inp in d_inputs] + [int(d_output)], stream_handle=stream.handle)
-                stream.synchronize()
-
-                cuda.memcpy_dtoh_async(h_output, d_output, stream)
-                stream.synchronize()
-
-                new_h_output = np.array(h_output.reshape(-1)[:input_ids.shape[0]*input_ids.shape[1]*2]).reshape(input_ids.shape[0], input_ids.shape[1], 2)
-                for index in range(input_ids.shape[0]):
-                    networkOutputs.append(_NetworkOutput(
-                        start_logits = new_h_output[index, :seq_length_list[output_index], 0],
-                        end_logits = new_h_output[index, :seq_length_list[output_index], 1],
-                        feature_index = index
-                    ))
-                    output_index += 1
-            infer_time = time.time() - start_time
-            output_index = 0
-            for (be, bf) in zip(batch_example_list, batch_feature_list):
-                for index in range(len(bf)):
-                    prediction, nbest_json, scores_diff_json = dp.get_predictions(be[index].doc_tokens, bf,
-                        [networkOutputs[output_index]], args.n_best_size, args.max_answer_length)
-                    output_index += 1
-                    all_precision[be[index].id] = prediction
-            return infer_time, all_precision
-
-        status = 0
-        if squad_examples:
-            all_predictions = collections.OrderedDict()
-
-            features_list = []
-            lengths = []
-
-            for example_index, example in tqdm(enumerate(squad_examples)):
-                features = question_features(example.doc_tokens, example.question_text)
-                features_list.append(features[0])
-                lengths.append(len(features[0].input_ids))
-
-            sort_index = np.argsort(lengths)
-            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)
-            print(F"E2E time : {infer_time:.3f} seconds")
-            qps = len(squad_examples)/infer_time
-            print(f"Latency QPS: {qps} sentences/s")
-
-            with open(output_prediction_file, "w") as f:
-                f.write(json.dumps(all_predictions, indent=4))
-                print("\nOutput dump to {}".format(output_prediction_file))
-
-            if args.target_qps:
-                if qps >= args.target_qps:
-                    print(f"target qps: {args.target_qps}, qps: {qps}, pass.")
-                else:
-                    print(f"target qps: {args.target_qps}, qps: {qps}, failed.")
-                    status = 1
-        else:
-            # Extract tokecs from the paragraph
-            doc_tokens = dp.convert_doc_tokens(paragraph_text)
-
-            if question_text:
-                print("\nPassage: {}".format(paragraph_text))
-                print("\nQuestion: {}".format(question_text))
-
-                features = question_features(doc_tokens, question_text)
-                eval_time_elapsed, prediction, nbest_json = inference(features, doc_tokens)
-                print_single_query(eval_time_elapsed, prediction, nbest_json)
-            else:
-                # If no question text is provided, loop until the question is 'exit'
-                EXIT_CMDS = ["exit", "quit"]
-                question_text = input("Question (to exit, type one of {:}): ".format(EXIT_CMDS))
-
-                while question_text.strip() not in EXIT_CMDS:
-                    features = question_features(doc_tokens, question_text)
-                    eval_time_elapsed, prediction, nbest_json = inference(features, doc_tokens)
-                    # print_single_query(eval_time_elapsed, prediction, nbest_json)
-                    # question_text = input("Question (to exit, type one of {:}): ".format(EXIT_CMDS))
-        del context
-        del engine
-        sys.exit(status)
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
index 67d6c18245b7eec0c8a995fc2a7284715429b498..05b91c9be79d25880e0bbcb1b9c3e24be83b0581 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
@@ -96,7 +96,10 @@ def evaluate(dataset, predictions, f1_acc):
         status = 0
     else:
         print("&&&& PASSED TensorRT BERT Squad Accuracy matches reference.")
-        
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["exact_match"] = round(exact_match, 3)
+    metricResult["metricResult"]["f1"] = round(f1, 3)
+    print(metricResult)
     return {'exact_match': exact_match, 'f1': f1, "status": status}
 
 if __name__ == '__main__':
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
index b6af06dcf683496128bbbdb9e5458f8b2753885d..920d5b809c974a21f3644e5109a01fac676a4a99 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
@@ -385,6 +385,11 @@ if __name__ == '__main__':
             qps = math.ceil(len(squad_examples)/args.batch_size)*args.batch_size/infer_time
             print(f"Latency QPS: {qps} sentences/s")
 
+            metricResult = {"metricResult": {}}
+            metricResult["metricResult"]["E2E time"] = round(infer_time, 3)
+            metricResult["metricResult"]["Latency QPS"] = round(qps, 3)
+            print(metricResult)
+
             with open(output_prediction_file, "w") as f:
                 f.write(json.dumps(all_predictions, indent=4))
                 print("\nOutput dump to {}".format(output_prediction_file))
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/perf.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/perf.py
similarity index 100%
rename from models/nlp/language_model/bert_base_squad/ixrt/python/perf.py
rename to models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/perf.py
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/load_ixrt_plugin.py b/models/nlp/language_model/bert_base_squad/ixrt/python/load_ixrt_plugin.py
deleted file mode 100644
index 8e04b80718c7113e1d9ae383e6e6e39f64980153..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/load_ixrt_plugin.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-from os.path import join, dirname, exists, abspath
-import tensorrt as trt
-import ctypes
-import os
-import subprocess
-
-def is_nvidia_platform():
-    try:
-        # 尝试运行 nvidia-smi
-        subprocess.check_output(['nvidia-smi'])
-        return True
-    except (subprocess.CalledProcessError, FileNotFoundError):
-        return False
-
-def load_ixrt_plugin(logger=trt.Logger(trt.Logger.WARNING), namespace="", dynamic_path=""):
-    if not dynamic_path:
-        if is_nvidia_platform():
-            dynamic_path = join(dirname(abspath(__file__)), "..", "build", "libixrt_plugin.so") 
-        else:
-            dynamic_path = join(dirname(trt.__file__), "lib", "libixrt_plugin.so")
-        
-    if not exists(dynamic_path):
-        raise FileNotFoundError(
-            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
-    handle = ctypes.CDLL(dynamic_path, mode=ctypes.RTLD_GLOBAL)
-    handle.initLibNvInferPlugins.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
-    handle.initLibNvInferPlugins.restype = ctypes.c_bool
-    handle.initLibNvInferPlugins(None, namespace.encode('utf-8'))
-    print(f"Loaded plugin from {dynamic_path}")
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu b/models/nlp/language_model/bert_base_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
index 5c4d5c5331077a490b1f6feb32d30596500fa23c..2330debf3e1bee647c70336b35729699b90ad06e 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
+++ b/models/nlp/language_model/bert_base_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
@@ -284,7 +284,7 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
         case 64:
         case 128:
         case 192:
-        case 256:
+        case 256: {
             cuinferFlashAttnConfigInfo flashAttnInfo;
             flashAttnInfo.scaling = sqrt(1.f / (head_dim * 1.0));
             flashAttnInfo.quantParam.q_amax = arrange_qkv_amax;
@@ -318,7 +318,8 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
             CUINFER_CHECK(cuinferFMHAForwardEx(cuinfer_handle, flashAttnInfo, qDesc, q_buffer, kDesc, k_buffer, vDesc,
                                                v_buffer, maskDesc, mask, oDesc, qk_buffer));
             break;
-        default:
+        }
+        default: {
             cuinfer_i8_gemm(k_buffer, q_buffer, nullptr, qkv_buffer, batch_size * head_num, batch_seq_len,
                             batch_seq_len, head_dim, batch_seq_len * head_dim, batch_seq_len * head_dim,
                             batch_seq_len * batch_seq_len, scaleBmm1, 0.0, 0, cuinfer_handle, stream);
@@ -330,6 +331,7 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
                                batch_seq_len, batch_seq_len * head_dim, batch_seq_len * batch_seq_len,
                                batch_seq_len * head_dim, scaleBmm2, cuinfer_handle, stream);
             break;
+        }
     }
 
     IxinferArrangeAttenOutputI8II8O(batch_token_num, hidden_size, stream, qk_buffer, qkv_out, batch_seq_len, head_dim,
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh b/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh
index 19e3e8a8cecd694e7558272ddec690171dc7b872..979f9050b91f1b5d2eace93840935329ee20fed3 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh
+++ b/models/nlp/language_model/bert_large_squad/ixrt/ci/prepare.sh
@@ -1,3 +1,41 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+# install ixrt run
+bash /root/data/3rd_party/ixrt-0.10.0+corex.4.2.0.20250115-linux_x86_64.run
+
+if [ "$1" = "nvidia" ]; then
+    cmake -S . -B build -DUSE_TENSORRT=true
+    cmake --build build -j16
+else
+    cmake -S . -B build
+    cmake --build build -j16
+fi
+
 pip install -r requirements.txt
 mkdir -p ./python/data
 ln -s /root/data/checkpoints/bert-large-uncased/ ./python/data && ln -s /root/data/datasets/squad/ ./python/data
\ No newline at end of file
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/python/evaluate-v1.1.py b/models/nlp/language_model/bert_large_squad/ixrt/python/evaluate-v1.1.py
index ba4ee19094e492b5caecef414da90adf2dba8514..ce5bb98df7f60176ac5def72f4c2a5d1d54f990e 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/python/evaluate-v1.1.py
+++ b/models/nlp/language_model/bert_large_squad/ixrt/python/evaluate-v1.1.py
@@ -107,6 +107,10 @@ def evaluate(dataset, predictions, f1_acc):
         print("&&&& FAILED TensorRT BERT Squad Accuracy matches reference.")
     else:
         print("&&&& PASSED TensorRT BERT Squad Accuracy matches reference.")
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["exact_match"] = round(exact_match, 3)
+    metricResult["metricResult"]["f1"] = round(f1, 3)
+    print(metricResult)
     return {'exact_match': exact_match, 'f1': f1}
 
 if __name__ == '__main__':
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
index 860322c3ed5873e0002d9aa24a011394dd92e570..ec93972d295cc3fa777ab60cf82d12401b99f7c3 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
+++ b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
@@ -377,6 +377,10 @@ if __name__ == '__main__':
             
             qps = len(squad_examples)/infer_time
             print(f"Latency QPS: {qps} sentences/s")
+            metricResult = {"metricResult": {}}
+            metricResult["metricResult"]["E2E time"] = round(infer_time, 3)
+            metricResult["metricResult"]["Latency QPS"] = round(qps, 3)
+            print(metricResult)
 
             with open(output_prediction_file, "w") as f:
                 f.write(json.dumps(all_predictions, indent=4))
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu b/models/nlp/language_model/bert_large_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
index 5c4d5c5331077a490b1f6feb32d30596500fa23c..2330debf3e1bee647c70336b35729699b90ad06e 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
+++ b/models/nlp/language_model/bert_large_squad/ixrt/src/qkv_to_context/qkvToContextInt8Plugin.cu
@@ -284,7 +284,7 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
         case 64:
         case 128:
         case 192:
-        case 256:
+        case 256: {
             cuinferFlashAttnConfigInfo flashAttnInfo;
             flashAttnInfo.scaling = sqrt(1.f / (head_dim * 1.0));
             flashAttnInfo.quantParam.q_amax = arrange_qkv_amax;
@@ -318,7 +318,8 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
             CUINFER_CHECK(cuinferFMHAForwardEx(cuinfer_handle, flashAttnInfo, qDesc, q_buffer, kDesc, k_buffer, vDesc,
                                                v_buffer, maskDesc, mask, oDesc, qk_buffer));
             break;
-        default:
+        }
+        default: {
             cuinfer_i8_gemm(k_buffer, q_buffer, nullptr, qkv_buffer, batch_size * head_num, batch_seq_len,
                             batch_seq_len, head_dim, batch_seq_len * head_dim, batch_seq_len * head_dim,
                             batch_seq_len * batch_seq_len, scaleBmm1, 0.0, 0, cuinfer_handle, stream);
@@ -330,6 +331,7 @@ cudaError_t fused_multihead_attetion_int8(int8_t* qkv_buffer, int8_t* mask, int8
                                batch_seq_len, batch_seq_len * head_dim, batch_seq_len * batch_seq_len,
                                batch_seq_len * head_dim, scaleBmm2, cuinfer_handle, stream);
             break;
+        }
     }
 
     IxinferArrangeAttenOutputI8II8O(batch_token_num, hidden_size, stream, qk_buffer, qkv_out, batch_seq_len, head_dim,
diff --git a/models/nlp/language_model/deberta/ixrt/README.md b/models/nlp/language_model/deberta/ixrt/README.md
index 221a33a895d476b2d73672ab6c26420528d0a33a..fd1757cd9235cf28fca87ff7ccd60371a43a6b8e 100644
--- a/models/nlp/language_model/deberta/ixrt/README.md
+++ b/models/nlp/language_model/deberta/ixrt/README.md
@@ -15,18 +15,7 @@ cd ${MODEL_PATH}
 
 apt install -y libnuma-dev
 
-pip3 install onnxsim
-pip3 install onnx_graphsurgeon
-pip3 install scikit-learn
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
-pip3 install tf2onnx
-pip3 install transformers==4.33.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -52,8 +41,10 @@ python3 remove_clip_and_cast.py
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./deberta-sim-drop-clip-drop-invaild-cast
-export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -78,13 +69,14 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 
 # setup
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 cp ./datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
 
 mv ./deberta-sim-drop-clip-drop-invaild-cast.onnx general_perf/model_zoo/popular/open_deberta/
 mv ./general_perf/model_zoo/popular/ ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/
 
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/deberta-torch-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/deberta-torch-fp32.json
 wget http://files.deepspark.org.cn:880/deepspark/Palak.tar
 tar -zxvf Palak.tar
 
diff --git a/models/nlp/language_model/deberta/ixrt/ci/prepare.sh b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d440393e7ed913ae6a92fc0ab043a5744086f8c1
--- /dev/null
+++ b/models/nlp/language_model/deberta/ixrt/ci/prepare.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+apt install -y libnuma-dev
+
+pip install -r requirements.txt
+
+cp /root/data/3rd_party/deberta-torch-fp32.json ./
+
+python3 torch2onnx.py --model_path /root/data/checkpoints/open_deberta/deberta-base-squad.pt --output_path deberta-torch-fp32.onnx
+onnxsim deberta-torch-fp32.onnx deberta-torch-fp32-sim.onnx
+python3 remove_clip_and_cast.py
+
+mkdir -p data/open_deberta
+cp ./deberta-sim-drop-clip-drop-invaild-cast.onnx data/open_deberta/deberta.onnx
+
+ln -s ../../../../../toolbox/ByteMLPerf ./
+
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+
+# setup
+cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
+
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular
+cp -r /root/data/checkpoints/open_deberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+cp ./deberta-sim-drop-clip-drop-invaild-cast.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_deberta/
+
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+cp -r /root/data/3rd_party/workloads ./
+# wget http://files.deepspark.org.cn:880/deepspark/Palak.tar
+cp /root/data/3rd_party/Palak.tar ./
+tar -zxvf Palak.tar
+
+#接着修改代码：ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py -AutoTokenizer.from_pretrained("Palak/microsoft_deberta-base_squad") => AutoTokenizer.from_pretrained("/Your/Path/Palak/microsoft_deberta-base_squad")
+
+# run acc perf
+sed -i 's/tensorrt_legacy/tensorrt/g' backends/ILUVATAR/common.py
\ No newline at end of file
diff --git a/models/nlp/language_model/deberta/ixrt/perf_engine.py b/models/nlp/language_model/deberta/ixrt/perf_engine.py
deleted file mode 100644
index 089d9860f573bba7e19f84aa20fb830a8fcc22d8..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/deberta/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
diff --git a/models/nlp/language_model/deberta/ixrt/requirements.txt b/models/nlp/language_model/deberta/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3f7ae708e22b8d1840e3c25c6893c44d85af410
--- /dev/null
+++ b/models/nlp/language_model/deberta/ixrt/requirements.txt
@@ -0,0 +1,12 @@
+onnxsim
+onnx_graphsurgeon
+scikit-learn
+tqdm
+pycuda
+onnx
+tabulate
+pycocotools
+opencv-python==4.6.0.66
+tf2onnx
+transformers==4.33.3
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md
index 0588c797f1f8bf147fe5d37607cf34c2821e7f6d..f37d5f3c0caa21984c992f3d4ca04367b567f2d5 100644
--- a/models/nlp/language_model/roberta/ixrt/README.md
+++ b/models/nlp/language_model/roberta/ixrt/README.md
@@ -13,11 +13,7 @@ export PROJ_ROOT=/PATH/TO/DEEPSPARKINFERENCE
 export MODEL_PATH=${PROJ_ROOT}/models/nlp/language_model/roberta/ixrt
 cd ${MODEL_PATH}
 
-pip3 install onnxsim
-pip3 install py-libnuma==1.2
-pip3 install bert
-pip3 install pycuda
-pip3 install transformers==4.33.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -48,8 +44,10 @@ onnxsim open_roberta/roberta-torch-fp32.onnx open_roberta/roberta-torch-fp32_sim
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./open_roberta/roberta-torch-fp32_sim
-export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -70,7 +68,6 @@ For detailed steps regarding this model, please refer to this document: <https:/
 ln -s ${PROJ_ROOT}/toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
 # Move open_roberta
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
@@ -86,11 +83,13 @@ rm -f open_squad.tar
 wget http://files.deepspark.org.cn:880/deepspark/csarron.tar
 tar xf csarron.tar
 rm -f csarron.tar
-mv csarron/ ./ByteMLPerf/byte_infer_perf/
+mv csarron/ ./ByteMLPerf/byte_infer_perf/general_perf/
 
 # Run Acc scripts
-cd ./ByteMLPerf/byte_infer_perf/
-python3 general_perf/core/perf_engine.py --hardware_type ILUVATAR --task roberta-torch-fp32
+cd ./ByteMLPerf/byte_infer_perf/general_perf/
+mkdir -p workloads
+wget -O workloads/roberta-torch-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/roberta-torch-fp32.json
+python3 core/perf_engine.py --hardware_type ILUVATAR --task roberta-torch-fp32
 ```
 
 ## Results
diff --git a/models/nlp/language_model/roberta/ixrt/ci/prepare.sh b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..81d02ab0621e5c06580fe8469fc9c2012ca3c3ee
--- /dev/null
+++ b/models/nlp/language_model/roberta/ixrt/ci/prepare.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+apt install -y libnuma-dev
+
+pip install -r requirements.txt
+
+mkdir -p data
+cp -r /root/data/checkpoints/open_roberta data/
+cp /root/data/3rd_party/roberta-torch-fp32.json ./
+# export onnx
+python3 export_onnx.py --model_path data/open_roberta/roberta-base-squad.pt --output_path data/open_roberta/roberta-torch-fp32.onnx
+
+# Simplify onnx model
+onnxsim data/open_roberta/roberta-torch-fp32.onnx data/open_roberta/roberta.onnx
+
+# Link and install requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+
+# Move open_roberta
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+cp -r data/open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+
+# Get open_squad
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
+cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
+
+# Get csarron.tar
+# wget http://files.deepspark.org.cn:880/deepspark/csarron.tar
+cp /root/data/3rd_party/csarron.tar ./
+tar xf csarron.tar
+rm -f csarron.tar
+mv csarron/ ./ByteMLPerf/byte_infer_perf/general_perf/
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+cp -r /root/data/3rd_party/workloads ./
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/export_onnx.py b/models/nlp/language_model/roberta/ixrt/export_onnx.py
index bc9d2da750a00a4eefd2323faf0354d9eb3eaf69..9f115b730caf065b3f3dfc496c161916afc96d9e 100644
--- a/models/nlp/language_model/roberta/ixrt/export_onnx.py
+++ b/models/nlp/language_model/roberta/ixrt/export_onnx.py
@@ -20,8 +20,8 @@ import torch
 
 
 def torch_to_onnx(model_path, output_path):
-    model_name = output_path.split(".")[0]
-    with open(model_name + ".json", "r") as f:
+    model_name = output_path.split("/")[-1][:-4]
+    with open(model_name + "json", "r") as f:
         model_info = json.load(f)
     model_inputs = model_info["inputs"].split(",")
     input_shapes = model_info["input_shape"]
diff --git a/models/nlp/language_model/roberta/ixrt/perf_engine.py b/models/nlp/language_model/roberta/ixrt/perf_engine.py
deleted file mode 100644
index f3f108474b281bfce71ccaf73d60ba3119cf97c1..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/roberta/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/requirements.txt b/models/nlp/language_model/roberta/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a715333b30f19defffc464ba1b979fe3d519edb
--- /dev/null
+++ b/models/nlp/language_model/roberta/ixrt/requirements.txt
@@ -0,0 +1,6 @@
+onnxsim
+py-libnuma==1.2
+bert
+pycuda
+transformers==4.33.3
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
index c088cf0f740821d5cc96557dbc53588f4ee5866f..59750879f55e8ac896bcbd84554f5da32271a9b4 100644
--- a/models/nlp/language_model/roformer/ixrt/README.md
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -11,10 +11,7 @@ Position encoding recently has shown effective in the transformer architecture.
 ```bash
 apt install -y libnuma-dev
 
-pip3 install tf2onnx
-pip3 install pycuda
-pip3 install onnxsim
-pip3 install py-libnuma==1.2
+pip3 install -r requirements.txt
 
 ```
 
@@ -53,8 +50,10 @@ python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --outpu
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./data/open_roformer/roformer-frozen
-export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -75,8 +74,6 @@ For detailed steps regarding this model, please refer to this document: <https:/
 ln -s ${PROJ_ROOT}/toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
-
 # Comment Line102 in compile_backend_iluvatar.py
 sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/compile_backend_iluvatar.py
 
@@ -92,6 +89,8 @@ rm -f open_cail2019.tar
 
 # Go to general_perf/
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/roformer-tf-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/roformer-tf-fp32.json
 # Modify model_zoo/roformer-tf-fp32.json
 sed -i 's/segment:0/segment0/g; s/token:0/token0/g' model_zoo/roformer-tf-fp32.json
 # Run Acc scripts
diff --git a/models/nlp/language_model/roformer/ixrt/ci/prepare.sh b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c3cc4f3d2e12028623cbd00969ac39960db5b490
--- /dev/null
+++ b/models/nlp/language_model/roformer/ixrt/ci/prepare.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+apt install -y libnuma-dev
+
+pip install -r requirements.txt
+
+mkdir -p data
+cp -r /root/data/checkpoints/open_roformer data/
+
+# export onnx
+python3 export_onnx.py --model_path ./data/open_roformer --output_path ./data/open_roformer/roformer-frozen_org.onnx
+
+# Simplify onnx model
+onnxsim ./data/open_roformer/roformer-frozen_org.onnx ./data/open_roformer/roformer-frozen.onnx
+python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer.onnx
+
+# link ByteMLPerf and install requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+
+# Comment Line102 in compile_backend_iluvatar.py
+sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/compile_backend_iluvatar.py
+
+# Move open_roformer
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+
+# Setup open_cail2019 dataset
+cp /root/data/datasets/open_cail2019/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019
+
+# Go to general_perf/
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+cp -r /root/data/3rd_party/workloads ./
+# Modify model_zoo/roformer-tf-fp32.json
+sed -i 's/segment:0/segment0/g; s/token:0/token0/g' model_zoo/roformer-tf-fp32.json
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/perf_engine.py b/models/nlp/language_model/roformer/ixrt/perf_engine.py
deleted file mode 100644
index f3f108474b281bfce71ccaf73d60ba3119cf97c1..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/roformer/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/requirements.txt b/models/nlp/language_model/roformer/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19b9f1aa2d6cd4784e293693a54053975a02e68d
--- /dev/null
+++ b/models/nlp/language_model/roformer/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tf2onnx
+pycuda
+onnxsim
+py-libnuma==1.2
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/nlp/language_model/videobert/ixrt/README.md b/models/nlp/language_model/videobert/ixrt/README.md
index d485fbe39f024ea7036fa987876307bfff02b2f5..7bfb8c92b63203a3a726b83a47f573383c36db8f 100644
--- a/models/nlp/language_model/videobert/ixrt/README.md
+++ b/models/nlp/language_model/videobert/ixrt/README.md
@@ -11,17 +11,7 @@ VideoBERT is a model designed for video understanding tasks, extending the capab
 ```bash
 apt install -y libnuma-dev
 
-pip3 install onnxsim
-pip3 install onnx_graphsurgeon
-pip3 install scikit-learn
-pip3 install tqdm
-pip3 install pycuda
-pip3 install onnx
-pip3 install tabulate
-pip3 install cv2
-pip3 install pycocotools
-pip3 install opencv-python==4.6.0.66
-pip3 install transformers==4.33.3
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -42,8 +32,10 @@ bash ./scripts/prepare_model_and_dataset.sh
 ## Inference
 
 ```bash
+git clone https://gitee.com/deep-spark/iluvatar-corex-ixrt.git --depth=1
+
 export ORIGIN_ONNX_NAME=./general_perf/model_zoo/popular/open_videobert/video-bert
-export OPTIMIER_FILE=./ixrt-oss/tools/optimizer/optimizer.py
+export OPTIMIER_FILE=./iluvatar-corex-ixrt/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -72,8 +64,9 @@ mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videob
 cp ./general_perf/model_zoo/popular/open_videobert/video-bert.onnx ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
 
 # run acc scripts
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/videobert-onnx-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/videobert-onnx-fp32.json
 python3 core/perf_engine.py --hardware_type ILUVATAR --task videobert-onnx-fp32
 ```
 
diff --git a/models/nlp/language_model/videobert/ixrt/ci/prepare.sh b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0d46c6c023fc58658a230714d3a1b06cc9430c2b
--- /dev/null
+++ b/models/nlp/language_model/videobert/ixrt/ci/prepare.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+apt install -y libnuma-dev
+
+pip install -r requirements.txt
+
+mkdir -p data
+cp -r /root/data/checkpoints/open_videobert data/
+
+# link and install requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+
+# copy data
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
+cp -r /root/data/datasets/open_cifar/cifar-100-python/ ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/
+cp /root/data/checkpoints/open_videobert/videobert.onnx ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_videobert/video-bert.onnx
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+cp -r /root/data/3rd_party/workloads ./
diff --git a/models/nlp/language_model/videobert/ixrt/perf_engine.py b/models/nlp/language_model/videobert/ixrt/perf_engine.py
deleted file mode 100644
index 089d9860f573bba7e19f84aa20fb830a8fcc22d8..0000000000000000000000000000000000000000
--- a/models/nlp/language_model/videobert/ixrt/perf_engine.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright 2023 ByteDance and/or its affiliates.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import logging
-import importlib
-import json
-import subprocess
-import time
-
-from typing import Any, Dict, Tuple
-from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
-from prompt_toolkit.styles import Style
-
-BYTE_MLPERF_ROOT = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-os.chdir(BYTE_MLPERF_ROOT)
-sys.path.insert(0, BYTE_MLPERF_ROOT)
-
-import argparse
-from general_perf.core.configs.workload_store import load_workload
-from general_perf.core.configs.dataset_store import load_dataset
-from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("PerfEngine")
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
-
-
-def get_args():
-    """Parse commandline."""
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        default="resnet50-tf-fp32",
-        help="The task going to be evaluted, refs to workloads/")
-    parser.add_argument(
-        "--hardware_type",
-        default="GPU",
-        help="The backend going to be evaluted, refs to backends/")
-    parser.add_argument("--compile_only",
-                        action='store_true',
-                        help="Run compilation only")
-
-    args = parser.parse_args()
-    return args
-
-
-class PerfEngine:
-    def __init__(self) -> None:
-        super().__init__()
-        self.args = get_args()
-        self.workload = load_workload(self.args.task)
-        self.backend_type = self.args.hardware_type
-        self.compile_backend = None
-        self.old_os_path = os.environ['PATH']
-        self.prev_sys_path = list(sys.path)
-        self.real_prefix = sys.prefix
-        self.compile_only_mode = False
-
-    def start_engine(self) -> None:
-        '''
-        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
-        '''
-        success, total = 0, len(self.workload)
-        if total == 0:
-            return
-        log.info("******************* Backend Env Initization *******************")
-        status = self.activate_venv(self.backend_type)
-        if not status:
-            log.warning("Activate virtualenv Failed, Please Check...")
-
-        self.compile_backend = init_compile_backend(self.backend_type)
-        self.runtime_backend = init_runtime_backend(self.backend_type)
-
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type)
-        os.makedirs(output_dir, exist_ok=True)
-        
-        status = self.single_workload_perf(self.workload)
-
-    def single_workload_perf(
-            self, workload: Dict[str, Any]) -> bool:
-        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
-
-        # Check Compile Only Mode
-        self.compile_only_mode = False
-        if self.args.compile_only or workload['compile_only']:
-            self.compile_only_mode = True
-
-        base_report = {
-            "Model": workload['model'].upper(),
-            "Backend": self.backend_type,
-            "Host Info": self.get_cpu_name()
-        }
-
-        # Initalize Model Config Info
-        model_info = self.get_model_info(workload['model'])
-        pre_compile_config = {"workload": workload, 'model_info': model_info}
-        interact_info = self.check_interact_info(pre_compile_config)
-        pre_compile_config['interact_info'] = interact_info
-        if not model_info['dataset_name']:
-            model_info['dataset_name'] = 'fake_dataset'
-
-
-        '''
-        Compile Backend could do some optimization like convert model format here
-        '''
-        log.info("******************************************* Running Backend Compilation... *******************************************")
-        log.info("Running Backend Preoptimization...")
-        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
-
-
-        # Initalize dataset
-        dataset = load_dataset(model_info)
-        dataset.preprocess()
-        base_report['Dataset'] = model_info['dataset_name'].upper(
-        ) if model_info['dataset_name'] else None
-
-        #Placeholder Only
-        segment_info = self.compile_backend.segment(pre_compile_config)
-
-        best_batch_sizes = self.compile_backend.get_best_batch_size()
-        if isinstance(best_batch_sizes, list):
-            pre_compile_config['workload'][
-                'batch_sizes'] = best_batch_sizes
-
-        log.info("Start to compile the model...")
-        start = time.time()
-        compile_info = self.compile_backend.compile(pre_compile_config,
-                                                    dataset)
-        end = time.time()
-
-        graph_compile_report = {}
-        graph_compile_report["Compile Duration"] = round(end - start, 5)
-        graph_compile_report["Compile Precision"] = compile_info[
-            'compile_precision']
-        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
-        if 'optimizations' in compile_info:
-            graph_compile_report['Optimizations'] = compile_info['optimizations']
-        if 'instance_count' in compile_info:
-            base_report['Instance Count'] = compile_info['instance_count']
-        if 'device_count' in compile_info:
-            base_report['Device Count'] = compile_info['device_count']
-        base_report['Graph Compile'] = graph_compile_report
-
-        # Initalize Output Dir and Reports
-        output_dir = os.path.abspath('general_perf/reports/' +
-                                     self.backend_type + '/' +
-                                     workload['model'])
-        os.makedirs(output_dir, exist_ok=True)
-
-        # Compile only mode will stop here
-        if self.compile_only_mode:
-            base_report.pop("Backend")
-            return compile_info["compile_status"], base_report
-
-        # load runtime backend
-        """
-        Start Here
-        """
-        batch_sizes = pre_compile_config['workload']['batch_sizes']
-        self.runtime_backend.configs = compile_info
-        self.runtime_backend.workload = workload
-        self.runtime_backend.model_info = model_info
-
-        self.runtime_backend.load(workload['batch_sizes'][0])
-        # test accuracy
-        accuracy_report = {}
-        AccuracyChecker = self.get_accuracy_checker(
-            model_info['dataset_name']
-            if model_info['dataset_name'] else 'fake_dataset')
-        AccuracyChecker.runtime_backend = self.runtime_backend
-        AccuracyChecker.dataloader = dataset
-        AccuracyChecker.output_dir = output_dir
-        AccuracyChecker.configs = compile_info
-
-        if workload['test_accuracy']:
-            log.info("******************************************* Running Accuracy Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            accuracy_results = AccuracyChecker.calculate_acc(
-                workload['data_percent'])
-
-            accuracy_report['Data Percent'] = workload['data_percent']
-            accuracy_report.update(accuracy_results)
-
-        # test numeric
-        if workload['test_numeric']:
-            log.info("******************************************* Running Numeric Checker... *******************************************")
-
-            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
-            if not workload['test_accuracy']:
-                accuracy_results = AccuracyChecker.calculate_acc(
-                    workload['data_percent'])
-            diff_results = AccuracyChecker.calculate_diff()
-            accuracy_report.update(diff_results)
-            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
-
-        if accuracy_report:
-            base_report['Accuracy'] = accuracy_report
-
-        # function to test qps and latency
-        if workload['test_perf']:
-            log.info("******************************************* Runing QPS Checker... *******************************************")
-            performance_reports = []
-            qs_status = self.runtime_backend.is_qs_mode_supported()
-            if qs_status:
-                qs_config = self.runtime_backend.generate_qs_config()
-                performance_reports = self.qs_benchmark(qs_config)
-            else:
-                for bs in batch_sizes:
-                    self.runtime_backend.load(bs)
-                    batch_reports = self.runtime_backend.benchmark(dataset)
-                    performance_reports.append(batch_reports)
-            base_report['Performance'] = performance_reports
-
-        if "Instance Count" not in base_report:
-            log.warning("Vendors need to Add # of instances")
-        if "Device Count" not in base_report:
-            log.warning("Vendors need to Add # of devices")
-
-        # write output to json file
-        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
-        with open(output_report_path, 'w') as file:
-            json.dump(base_report, file, indent=4)
-
-        base_report.pop("Backend")
-        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
-                 format(output_dir[output_dir.rfind('general_perf'):],
-                 os.path.basename(output_report_path)))
-
-        return compile_info["compile_status"]
-
-    #WIP
-    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
-        return []
-
-    def get_accuracy_checker(self, dataset_name: str):
-        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
-                                                  dataset_name +
-                                                  ".test_accuracy")
-        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
-        return AccuracyChecker()
-
-    def get_model_info(self, model_name: str) -> Dict[str, Any]:
-        with open("general_perf/model_zoo/" + model_name + '.json',
-                  'r') as file:
-            model_info = json.load(file)
-        return model_info
-
-    def get_cpu_name(self):
-        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
-        cpu_name = subprocess.check_output(command, shell=True)
-        return cpu_name.decode().strip()
-
-    def check_interact_info(
-            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
-        interact_info = self.compile_backend.get_interact_profile(
-            pre_compile_config)
-
-        answer = {}
-        if len(interact_info) == 0:
-            return answer
-
-        dialog_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-        })
-
-        input_style = Style.from_dict({
-            'dialog': 'bg:#88b8ff',
-            'dialog frame.label': 'bg:#ffffff #000000',
-            'dialog.body': 'bg:#000000 #a0acde',
-            'dialog shadow': 'bg:#004aaa',
-            'text-area.prompt': 'bg:#ffffff',
-            'text-area': '#000000',
-        })
-
-        option = yes_no_dialog(title=self.backend_type + '编译配置',
-                               text='[请选择]：是否进行编译后端配置:',
-                               style=dialog_style).run()
-        if option:
-            sum_question = len(interact_info)
-            for i, question in enumerate(interact_info):
-                if question['depends']:
-                    state = 0
-                    for title in question['depends'].split(','):
-                        if not answer[title]:
-                            state = 1
-                    if state:
-                        continue
-                if question['dialog_type'] == 'Yes/No Dialog':
-                    option = yes_no_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=dialog_style).run()
-                elif question['dialog_type'] == "Input Dialog":
-                    option = input_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        style=input_style).run()
-                elif question['dialog_type'] == "Radiolist Dialog":
-                    choice = [(i, text)
-                              for i, text in enumerate(question['options'])]
-                    num = radiolist_dialog(
-                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
-                        '/' + str(sum_question) + ')',
-                        text="[Backend " + self.backend_type + "]: " +
-                        question['note'],
-                        values=choice,
-                        style=dialog_style).run()
-                    option = question['options'][num] if num is not None else question[
-                        'default']
-                answer[question['name']] = option
-
-        return answer
-
-    def activate_venv(self, hardware_type: str) -> bool:
-        
-        return True
-
-    def deactivate_venv(self):
-        sys.path[:
-                 0] = self.prev_sys_path  #will also revert the added site-packages
-        sys.prefix = self.real_prefix
-        os.environ['PATH'] = self.old_os_path
-
-
-if __name__ == "__main__":
-    engine = PerfEngine()
-    engine.start_engine()
diff --git a/models/nlp/language_model/videobert/ixrt/requirements.txt b/models/nlp/language_model/videobert/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b4007837b93c587033b8d324799b9abe0a323e2
--- /dev/null
+++ b/models/nlp/language_model/videobert/ixrt/requirements.txt
@@ -0,0 +1,11 @@
+onnxsim
+onnx_graphsurgeon
+scikit-learn
+tqdm
+pycuda
+onnx
+tabulate
+pycocotools
+opencv-python==4.6.0.66
+transformers==4.33.3
+typing-extensions==4.12.2
\ No newline at end of file
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
index 94954558ef2e09e20beebb85d90cc0daca9556fa..c6653cabd3e38107513cb66ac01ca7e7120b05e7 100644
--- a/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/README.md
@@ -9,10 +9,9 @@ Generalized linear models with nonlinear feature transformations are widely used
 ### Install
 
 ```bash
-pip3 install tf2onnx
-pip3 install pycuda
-pip3 install onnxsim
-pip3 install py-libnuma==1.2
+apt install -y libnuma-dev
+
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -62,7 +61,6 @@ For detailed steps regarding this model, please refer to this document: <https:/
 # link and install ByteMLPerf requirements
 ln -s ${PROJ_ROOT}/toolbox/ByteMLPerf ./
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
 # Get eval.csv and onnx
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model
@@ -72,10 +70,13 @@ wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv
 mv eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
 
 wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx
-mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
+cp open_wide_deep_saved_model/* ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
+mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape.onnx
 
 # Run Acc scripts
 cd ./ByteMLPerf/byte_infer_perf/general_perf
+mkdir -p workloads
+wget -O workloads/widedeep-tf-fp32.json https://raw.githubusercontent.com/bytedance/ByteMLPerf/refs/heads/main/byte_infer_perf/general_perf/workloads/widedeep-tf-fp32.json
 python3 core/perf_engine.py --hardware_type ILUVATAR --task widedeep-tf-fp32
 ```
 
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2e65a751f891ad3089d1bb5e27c50a032fdaaf81
--- /dev/null
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/ci/prepare.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+apt install -y libnuma-dev
+
+pip install -r requirements.txt
+cp -r /root/data/checkpoints/open_wide_deep_saved_model ./
+python3 export_onnx.py --model_path open_wide_deep_saved_model --output_path open_wide_deep_saved_model/widedeep.onnx
+
+# Simplify onnx model
+onnxsim open_wide_deep_saved_model/widedeep.onnx open_wide_deep_saved_model/widedeep_sim.onnx
+python3 deploy.py --model_path open_wide_deep_saved_model/widedeep_sim.onnx --output_path open_wide_deep_saved_model/widedeep_sim.onnx
+python3 change2dynamic.py --model_path open_wide_deep_saved_model/widedeep_sim.onnx --output_path open_wide_deep_saved_model/widedeep_sim.onnx
+
+mkdir -p data/open_widedeep
+mv open_wide_deep_saved_model/widedeep_sim.onnx data/open_widedeep/widedeep.onnx
+
+# link and install ByteMLPerf requirements
+ln -s ../../../../../toolbox/ByteMLPerf ./
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+
+# Get eval.csv and onnx
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
+
+cp /root/data/datasets/eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
+
+wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx
+cp open_wide_deep_saved_model/* ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
+mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape.onnx
+
+cp -r /root/data/3rd_party/workloads ./ByteMLPerf/byte_infer_perf/general_perf/
\ No newline at end of file
diff --git a/models/recommendation/ctr-prediction/widedeep/ixrt/requirements.txt b/models/recommendation/ctr-prediction/widedeep/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60aeb062457169c7f9b83e38742e1e1326d34a65
--- /dev/null
+++ b/models/recommendation/ctr-prediction/widedeep/ixrt/requirements.txt
@@ -0,0 +1,4 @@
+tf2onnx
+pycuda
+onnxsim
+py-libnuma==1.2
\ No newline at end of file
diff --git a/models/speech/speech_recognition/conformer/igie/ci/prepare.sh b/models/speech/speech_recognition/conformer/igie/ci/prepare.sh
index f05e4ce9fd40c825f3f2c5dd5918f937c9d7acfc..49f448a67b02f4ee0dd8b313948e9b8d710c2fce 100644
--- a/models/speech/speech_recognition/conformer/igie/ci/prepare.sh
+++ b/models/speech/speech_recognition/conformer/igie/ci/prepare.sh
@@ -20,7 +20,7 @@ pip3 install -r requirements.txt
 cd ctc_decoder/swig && bash setup.sh
 cd ../../
 
-tar -zxvf 20211025_conformer_exp.tar.gz
+# tar -zxvf 20211025_conformer_exp.tar.gz
 
 # Get Onnx Model
 cd wenet
diff --git a/models/speech/speech_recognition/conformer/ixrt/README.md b/models/speech/speech_recognition/conformer/ixrt/README.md
index 2ad0e26a13c8c6b2400e78726f8b0bab713cea45..ed8584218b15d277bb3d24a93404961789129363 100644
--- a/models/speech/speech_recognition/conformer/ixrt/README.md
+++ b/models/speech/speech_recognition/conformer/ixrt/README.md
@@ -15,11 +15,7 @@ yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-glx
 
-pip3 install tqdm
-pip3 install onnx
-pip3 install typeguard==2.13.3
-pip3 install onnxsim
-pip3 install pycuda
+pip3 install -r requirements.txt
 ```
 
 ### Download
@@ -38,7 +34,7 @@ ln -s /home/deepspark/datasets/INFER/conformer/20210601_u2++_conformer_exp_aishe
 
 ```bash
 # Accuracy
-DATA_DIR=/PATH/to/data_aishell
+DATA_DIR=/PATH/to/aishell_test_data
 TOOL_DIR="$(pwd)/tools"
 bash scripts/aishell_data_prepare.sh ${DATA_DIR} ${TOOL_DIR}
 ```
@@ -49,9 +45,9 @@ bash scripts/aishell_data_prepare.sh ${DATA_DIR} ${TOOL_DIR}
 
 ```bash
 # Accuracy
-bash scripts/infer_conformer_fp16_accuracy_ixrt.sh
+bash scripts/infer_conformer_fp16_accuracy.sh
 # Performance
-bash scripts/infer_conformer_fp16_performance_ixrt.sh
+bash scripts/infer_conformer_fp16_performance.sh
 ```
 
 ## Results
diff --git a/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2ee5de0b5856d5a3e511be55609a6a137400d47e
--- /dev/null
+++ b/models/speech/speech_recognition/conformer/ixrt/ci/prepare.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+ln -s /root/data/checkpoints/20210601_u2++_conformer_exp_aishell ./conformer_checkpoints
+
+ln -s /root/data/datasets/AISHELL/data_aishell ./aishell_test_data
+bash scripts/aishell_data_prepare.sh ./aishell_test_data ./tools
\ No newline at end of file
diff --git a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
index 35aad9bbf24533bed27e98ddbe4e326fa897df88..702221902669351bb1949453c3ee566477ed5692 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
+++ b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
@@ -144,7 +144,7 @@ def main():
     args = get_args()
 
     # 读取配置文件
-    config_fn = os.path.join(args.model_dir, "config.yaml")
+    config_fn = os.path.join(args.model_dir, "train.yaml")
     with open(config_fn, "r") as fin:
         configs = yaml.load(fin, Loader=yaml.FullLoader)
 
@@ -164,7 +164,7 @@ def main():
     dataset_conf["batch_conf"]["batch_size"] = args.batch_size
 
     # Load dict
-    dict_fn = os.path.join(args.model_dir, "words.txt")
+    dict_fn = os.path.join(args.model_dir, "units.txt")
     char_dict = {}
     with open(dict_fn, "r", encoding="utf8") as fin:
         for line in fin:
@@ -272,6 +272,10 @@ def main():
 
     cer, corr = calculate_cer(results, reference_data)
     target_cer = float(os.environ["Accuracy"])
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["CER"] = round(cer, 3)
+    metricResult["metricResult"]["target CER"] = round(target_cer, 3)
+    print(metricResult)
     print("CER: ", cer, "target CER: ", target_cer)
     if cer <= target_cer:
         print("pass!")
diff --git a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
index c19233fa6813722083e1e86fbfc310dcd1370670..fcfcb55952d5b4616c140e9ae1f076f90b57ad58 100644
--- a/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
+++ b/models/speech/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
@@ -145,7 +145,7 @@ def main():
     args = get_args()
 
     # 读取配置文件
-    config_fn = os.path.join(args.model_dir, "config.yaml")
+    config_fn = os.path.join(args.model_dir, "train.yaml")
     with open(config_fn, "r") as fin:
         configs = yaml.load(fin, Loader=yaml.FullLoader)
 
@@ -165,7 +165,7 @@ def main():
     dataset_conf["batch_conf"]["batch_size"] = args.batch_size
 
     # Load dict
-    dict_fn = os.path.join(args.model_dir, "words.txt")
+    dict_fn = os.path.join(args.model_dir, "units.txt")
     char_dict = {}
     with open(dict_fn, "r", encoding="utf8") as fin:
         for line in fin:
@@ -261,6 +261,10 @@ def main():
     print(f"Recognize {num_samples} sentences, {QPS} sentences/s")
     target_qps = float(os.environ["Accuracy"])
     print("QPS: = ", QPS, "target QPS: ", target_qps)
+    metricResult = {"metricResult": {}}
+    metricResult["metricResult"]["QPS"] = round(QPS, 3)
+    metricResult["metricResult"]["target QPS"] = round(target_qps, 3)
+    print(metricResult)
     if QPS >= target_qps:
         print("pass!")
         exit()
diff --git a/models/speech/speech_recognition/conformer/ixrt/requirements.txt b/models/speech/speech_recognition/conformer/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd4788cf7291642c165bfd61f31399f2e24213e9
--- /dev/null
+++ b/models/speech/speech_recognition/conformer/ixrt/requirements.txt
@@ -0,0 +1,5 @@
+tqdm
+onnx
+typeguard==2.13.3
+onnxsim
+pycuda
\ No newline at end of file
diff --git a/models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy_ixrt.sh b/models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy.sh
similarity index 100%
rename from models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy_ixrt.sh
rename to models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy.sh
diff --git a/models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance_ixrt.sh b/models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance.sh
similarity index 100%
rename from models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance_ixrt.sh
rename to models/speech/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance.sh
diff --git a/models/speech/speech_recognition/transformer_asr/ixrt/README.md b/models/speech/speech_recognition/transformer_asr/ixrt/README.md
index 0c2e1b456d5fe38efdda736439c1361a14dcedcd..9d809a7dc8c2216faf0a0791942aa4d93eebce32 100644
--- a/models/speech/speech_recognition/transformer_asr/ixrt/README.md
+++ b/models/speech/speech_recognition/transformer_asr/ixrt/README.md
@@ -9,7 +9,7 @@ Beam search allows us to exert control over the output of text generation. This
 ### Install
 
 ```bash
-pip3 install speechbrain==0.5.13
+pip3 install -r requirements.txt
 ```
 
 ### Download
diff --git a/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5a1f966836c58193331ab4d43411a5622c04ad79
--- /dev/null
+++ b/models/speech/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+pip3 install -r requirements.txt
+
+mkdir -p results/transformer
+cp -r /root/data/checkpoints/8886 results/transformer/
+mkdir -p results/transformer/8886/save
+mkdir -p /home/data/speechbrain/aishell/csv_data
+ln -s /root/data/datasets/AISHELL/data_aishell /home/data/speechbrain/aishell/
+cp results/transformer/8886/*.csv /home/data/speechbrain/aishell/csv_data
+
+bash build.sh
+
+python3 builder.py \
+--ckpt_path results/transformer/8886/save \
+--head_num 4 \
+--max_batch_size 64  \
+--max_seq_len 1024 \
+--engine_path transformer.engine
\ No newline at end of file
diff --git a/models/speech/speech_recognition/transformer_asr/ixrt/requirements.txt b/models/speech/speech_recognition/transformer_asr/ixrt/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4aa5b041597a8aa8d1cd4cd4ec2cc72ef81d621a
--- /dev/null
+++ b/models/speech/speech_recognition/transformer_asr/ixrt/requirements.txt
@@ -0,0 +1 @@
+speechbrain==0.5.13
\ No newline at end of file
diff --git a/tests/models_ixrt.yaml b/tests/models_ixrt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4bc981dd654cb985b3689fffd596b30eb8bad0b
--- /dev/null
+++ b/tests/models_ixrt.yaml
@@ -0,0 +1,502 @@
+---
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/alexnet-owt-7be5be79.pth
+  name: alexnet
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/alexnet/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/convnext_small-0c510722.pth
+  name: convnext_small
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/convnext_small/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://local/cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth
+  name: cspdarknet53
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/cspdarknet53/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth
+  name: cspresnet50
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/cspresnet50/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://local/densenet121.pth
+  name: densenet121
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/densenet121/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/densenet161-8d451a50.pth
+  name: densenet161
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/densenet161/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/densenet169-b2777c0a.pth
+  name: densenet169
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/densenet169/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth
+  name: efficientnet_b0
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/efficientnet_b0/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth
+  name: efficientnet_b1
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/efficientnet_b1/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/efficientnet_b2_rwightman-c35c1473.pth
+  name: efficientnet_b2
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/efficientnet_b2/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://local/efficientnet_v2.pth
+  name: efficientnet_v2
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/efficientnet_v2/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/googlenet-1378be20.pth
+  name: googlenet
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/googlenet/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth
+  name: hrnet_w18
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/hrnet_w18/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth
+  name: inception_v3
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/inception_v3/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth
+  name: inceptionresnetv2
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/inceptionresnetv2/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
+  name: mobilenet_v2
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/mobilenet_v2/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth
+  name: mobilenet_v3
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/mobilenet_v3/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth
+  name: repvgg
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/repvgg/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net50_14w_8s-6527dddc.pth
+  name: res2net50
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/res2net50/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnet101.pth
+  name: resnet101
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnet101/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnet18-f37072fd.pth
+  name: resnet18
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnet18/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnet34-b627a593.pth
+  name: resnet34
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnet34/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnet50-0676ba61.pth
+  name: resnet50
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnet50/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth
+  name: resnetv1d50
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/resnetv1d50/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
+  name: resnext50_32x4d
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/resnext50_32x4d/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1.pth
+  name: shufflenet_v1
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/shufflenet_v1/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth
+  name: squeezenet_v1_0
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/squeezenet_v1_0/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth
+  name: squeezenet_v1_1
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/squeezenet_v1_1/ixrt
+  task_type: cv/classification
+- datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_imagenet.tar
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open-swin-large.tar
+  name: swin_transformer_large
+  need_comfirm: true
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/classification/swin_transformer_large/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/vgg16-397923af.pth
+  name: vgg16
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/vgg16/ixrt
+  task_type: cv/classification
+- datasets: https://www.image-net.org/download.php
+  download_url: https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth
+  name: wide_resnet50
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/classification/wide_resnet50/ixrt
+  task_type: cv/classification
+- datasets: local/coco
+  download_url: https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth
+  name: centernet
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/detection/centernet/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: hhttps://download.openmmlab.com/mmdetection/v3.0/detr/detr_r50_8xb2-150e_coco/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth
+  name: detr
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/detection/detr/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth
+  name: fcos
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/cv/detection/fcos/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://pjreddie.com/media/files/yolov3.weights
+  name: yolov3
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov3/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights
+  name: yolov4
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov4/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5m.pt
+  name: yolov5
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov5/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt
+  name: yolov5s
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov5s/ixrt
+  task_type: cv/detection
+- 3rd_party_repo: YOLOv6
+  datasets: local/coco
+  download_url: https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s.pt
+  name: yolov6
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov6/ixrt
+  task_type: cv/detection
+- 3rd_party_repo: yolov7
+  datasets: local/coco
+  download_url: https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
+  name: yolov7
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov7/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt
+  name: yolov8
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolov8/ixrt
+  task_type: cv/detection
+- datasets: local/coco
+  download_url: https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth
+  name: yolox
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/detection/yolox/ixrt
+  task_type: cv/detection
+- datasets: https://raw.githubusercontent.com/lanrax/Project_dataset/master/facenet_datasets.zip
+  download_url: https://drive.google.com/open?id=1R77HmFADxe87GmoLwzfgMu_HY0IhcyBz
+  name: facenet
+  need_third_part: true
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/cv/face/facenet/ixrt
+  task_type: cv/face
+- datasets: local/coco
+  download_url: https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth
+  name: lightweight_openpose
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/cv/pose_estimation/lightweight_openpose/ixrt
+  task_type: cv/pose_estimation
+- datasets: local/coco
+  download_url: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth
+  name: rtmpose
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/pose_estimation/rtmpose/ixrt
+  task_type: cv/pose_estimation
+- datasets: local/coco
+  download_url: https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x/137259246/model_final_9243eb.pkl
+  name: mask_rcnn
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/cv/segmentation/mask_rcnn/ixrt
+  task_type: cv/segmentation
+- datasets: local/coco
+  download_url: https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth
+  name: solov1
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/cv/segmentation/solov1/ixrt
+  task_type: cv/segmentation
+- datasets: local/coco
+  need_comfirm: true
+  download_url: clip-vit-base-patch32.zip
+  name: clip
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/multimodal/text_and_image/clip/ixrt
+  task_type: multimodal/text_and_image
+- datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_albert.tar
+  name: albert
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/nlp/language_model/albert/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://huggingface.co/csarron/bert-base-uncased-squad-v1
+  name: bert_base_squad
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/nlp/language_model/bert_base_squad/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://huggingface.co/neuralmagic/bert-large-uncased-finetuned-squadv1
+  name: bert_large_squad
+  need_third_part: false
+  precisions:
+    - fp16
+    - int8
+  relative_path: models/nlp/language_model/bert_large_squad/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_deberta.tar
+  name: deberta
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/nlp/language_model/deberta/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta.tar
+  name: roberta
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/nlp/language_model/roberta/ixrt
+  task_type: nlp/language_model
+- datasets: local/SQuAD
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roformer.tar
+  name: roformer
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/nlp/language_model/roformer/ixrt
+  task_type: nlp/language_model
+- datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/cifar-100-python.tar
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_videobert.tar
+  name: videobert
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/nlp/language_model/videobert/ixrt
+  task_type: nlp/language_model
+- datasets: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv
+  need_comfirm: true
+  download_url: https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_wide_deep_saved_model.tar
+  name: widedeep
+  need_third_part: false
+  precisions:
+    - fp16
+  relative_path: models/recommendation/ctr-prediction/widedeep/ixrt
+  task_type: recommendation/ctr-prediction
+- datasets: https://www.openslr.org/33/aishell.tar.gz
+  download_url: http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20211025_conformer_exp.tar.gz
+  name: conformer
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/speech/speech_recognition/conformer/ixrt
+  task_type: speech/speech_recognition
+- datasets: https://www.openslr.org/33/aishell.tar.gz
+  download_url: https://drive.google.com/drive/folders/1_2zN6lbu4zUc0-iq8XbABEm6fl9mohkv
+  name: transformer_asr
+  need_third_part: true
+  precisions:
+    - fp16
+  relative_path: models/speech/speech_recognition/transformer_asr/ixrt
+  task_type: speech/speech_recognition
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
new file mode 100644
index 0000000000000000000000000000000000000000..63573efa3468c83e5c77d4ca0ffe4d615ac10430
--- /dev/null
+++ b/tests/run_ixrt.py
@@ -0,0 +1,497 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import yaml
+import subprocess
+import json
+import re
+import time
+import logging
+import os
+import sys
+import argparse
+
+import utils
+
+# 配置日志
+debug_level = logging.DEBUG if utils.is_debug() else logging.INFO
+logging.basicConfig(
+    handlers=[logging.FileHandler("output.log"), logging.StreamHandler()],
+    level=debug_level,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+METRIC_PATTERN = r"{'metricResult':.*}"
+
+def main():
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("--model", type=str, help="model name, e.g: alexnet")
+    args = parser.parse_args()
+
+    if args.model:
+        test_model = args.model
+    else:
+        test_model = os.environ.get("TEST_CASE")
+    logging.info(f"Test case to run: {test_model}")
+    if not test_model:
+        logging.error("test model case is empty")
+        sys.exit(-1)
+    
+    model = get_model_config(test_model)
+    if not model:
+        logging.error("mode config is empty")
+        sys.exit(-1)
+
+    result = {}
+    if model["task_type"] == "cv/classification":
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_clf_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
+
+    # 检测模型
+    if model["task_type"] in ["cv/detection", "cv/pose_estimation"]:
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_detec_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
+
+    # Segmentation模型
+    if model["task_type"] in ["cv/segmentation", "cv/face", "multimodal/text_and_image"]:
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_segmentation_and_face_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
+
+    # Speech模型
+    if model["task_type"] in ["speech/speech_recognition"]:
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_speech_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
+
+    # NLP模型
+    if model["task_type"] in ["nlp/language_model", "recommendation/ctr-prediction"]:
+        logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_nlp_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['name']} test case.")
+
+    logging.info(f"Full text result: {result}")
+
+def get_model_config(mode_name):
+    with open("models_ixrt.yaml", "r") as file:
+        models = yaml.safe_load(file)
+
+    for model in models:
+        if model["name"] == mode_name.lower():
+            return model
+    return
+
+def check_model_result(result):
+    status = "PASS"
+    for prec in ["fp16", "int8"]:
+        if prec in result["result"]:
+            if result["result"][prec]["status"] == "FAIL":
+                status = "FAIL"
+                break
+    result["status"] = status
+
+def run_clf_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['relative_path']}
+    bash ci/prepare.sh
+    """
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    config_name = model_name.upper()
+
+    patterns = {
+        "FPS": r"FPS\s*:\s*(\d+\.?\d*)",
+        "Acc1": r"Acc@1\s*:\s*(\d+\.?\d*)",
+        "Acc5": r"Acc@5\s*:\s*(\d+\.?\d*)",
+        "E2E": r"E2E time\s*:\s*(\d+\.\d+)"
+    }
+
+    combined_pattern = re.compile("|".join(f"(?P<{name}>{pattern})" for name, pattern in patterns.items()))
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['relative_path']}
+        export DATASETS_DIR=/root/data/datasets/imagenet-val
+        export PROJ_DIR=./
+        export CHECKPOINTS_DIR=./checkpoints
+        export RUN_DIR=./
+        export CONFIG_DIR=config/{config_name}_CONFIG
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        if model_name == "swin_transformer_large":
+            script = f"""
+            cd ../{model['relative_path']}
+            export ORIGIN_ONNX_NAME=./swin-large-torch-fp32
+            export OPTIMIER_FILE=/root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py
+            export PROJ_PATH=./
+            bash scripts/infer_swinl_fp16_performance.sh
+            cd ./ByteMLPerf/byte_infer_perf/general_perf
+            python3 core/perf_engine.py --hardware_type ILUVATAR --task swin-large-torch-fp32
+            """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        matchs = combined_pattern.finditer(sout)
+        result["result"].setdefault(prec, {"status": "FAIL"})
+        match_count = 0
+        for match in matchs:
+            for name, value in match.groupdict().items():
+                if value:
+                    match_count += 1
+                    result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}")
+                    break
+
+        if match_count == len(patterns):
+            result["result"][prec]["status"] = "PASS"
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+    return result
+
+def run_detec_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['relative_path']}
+    ln -s /root/data/datasets/{dataset_n} ./
+    bash ci/prepare.sh
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    config_name = model_name.upper()
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['relative_path']}
+        export DATASETS_DIR=./{dataset_n}/
+
+        export MODEL_PATH=./{model_name}.onnx
+
+        export PROJ_DIR=./
+        export CHECKPOINTS_DIR=./checkpoints
+        export COCO_GT=./{dataset_n}/annotations/instances_val2017.json
+        export EVAL_DIR=./{dataset_n}/val2017
+        export RUN_DIR=./
+        export CONFIG_DIR=config/{config_name}_CONFIG
+
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        if model_name == "rtmpose":
+            script = f"""
+                cd ../{model['relative_path']}
+                python3 predict.py --model data/rtmpose/rtmpose_opt.onnx --precision fp16 --img_path demo/demo.jpg
+                """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        fps_pattern = r"(?P<FPS>FPS\s*:\s*(\d+\.?\d*))"
+        e2e_pattern = r"(?P<E2E>\s*E2E time\s*:\s*(\d+\.\d+)\s)"
+        combined_pattern = re.compile(f"{fps_pattern}|{e2e_pattern}")
+        matchs = combined_pattern.finditer(sout)
+        for match in matchs:
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            for name, value in match.groupdict().items():
+                if value:
+                    try:
+                        result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}")
+                        break
+                    except ValueError:
+                        print("The string cannot be converted to a float.")
+                        result["result"][prec][name] = value
+        pattern = r"Average Precision  \(AP\) @\[ (IoU=0.50[:\d.]*)\s*\| area=   all \| maxDets=\s?\d+\s?\] =\s*([\d.]+)"
+        matchs = re.findall(pattern, sout)
+        for m in matchs:
+            result["result"].setdefault(prec, {})
+            try:
+                result["result"][prec][m[0]] = float(m[1])
+            except ValueError:
+                print("The string cannot be converted to a float.")
+                result["result"][prec][m[0]] = m[1]
+        if matchs and len(matchs) == 2:
+            result["result"][prec]["status"] = "PASS"
+        else:
+            pattern = METRIC_PATTERN
+            matchs = re.findall(pattern, sout)
+            if matchs and len(matchs) == 1:
+                result["result"].setdefault(prec, {})
+                result["result"][prec].update(get_metric_result(matchs[0]))
+                result["result"][prec]["status"] = "PASS"
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+
+    return result
+
+def run_segmentation_and_face_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    dataset_n = model["datasets"].split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['relative_path']}
+    bash ci/prepare.sh
+    ls -l | grep onnx
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['relative_path']}
+        export DATASETS_DIR=./{dataset_n}/
+        export PROJ_DIR=./
+        export CHECKPOINTS_DIR=./checkpoints
+        export COCO_GT=./{dataset_n}/annotations/instances_val2017.json
+        export EVAL_DIR=./{dataset_n}/val2017
+        export RUN_DIR=./
+
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        if model_name == "clip":
+            script = f"""
+            cd ../{model['relative_path']}
+            python3 inference.py
+            """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        
+        pattern = METRIC_PATTERN
+        matchs = re.findall(pattern, sout)
+        result["result"].setdefault(prec, {"status": "FAIL"})
+        logging.debug(f"matchs:\n{matchs}")
+        for m in matchs:
+            result["result"][prec].update(get_metric_result(m))
+        if len(matchs) == 2:
+            result["result"][prec]["status"] = "PASS"
+
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+    return result
+
+# BERT series models
+def run_nlp_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    prepare_script = f"""
+    set -x
+    cd ../{model['relative_path']}
+    bash ci/prepare.sh
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        set -x
+        cd ../{model['relative_path']}
+        export ORIGIN_ONNX_NAME=./data/open_{model_name}/{model_name}
+        export OPTIMIER_FILE=/root/data/3rd_party/iluvatar-corex-ixrt/tools/optimizer/optimizer.py
+        export PROJ_PATH=./
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        cd ./ByteMLPerf/byte_infer_perf/general_perf
+        """
+        if model_name == "roformer" or model_name == "widedeep":
+            script += f"""
+            python3 core/perf_engine.py --hardware_type ILUVATAR --task {model_name}-tf-fp32
+            """
+        elif model_name == "videobert":
+            script += f"""
+            python3 core/perf_engine.py --hardware_type ILUVATAR --task {model_name}-onnx-fp32
+            """
+        else:
+            #  model_name == "roberta" or model_name == "deberta" or model_name == "albert"
+            script += f"""
+            python3 core/perf_engine.py --hardware_type ILUVATAR --task {model_name}-torch-fp32
+            """
+
+
+        if model_name == "bert_base_squad":
+            script = f"""
+            set -x
+            cd ../{model['relative_path']}/python
+            bash script/infer_{model_name}_{prec}_ixrt.sh
+            """
+        elif model_name == "bert_large_squad":
+            script = f"""
+            set -x
+            cd ../{model['relative_path']}/python
+            bash script/build_engine.sh --bs 32
+            bash script/inference_squad.sh --bs 32
+            """
+            if prec == "int8":
+                script = f"""
+                set -x
+                cd ../{model['relative_path']}/python
+                bash script/build_engine.sh --bs 32 --int8
+                bash script/inference_squad.sh --bs 32 --int8
+                """
+
+        r, t = run_script(script)
+        sout = r.stdout
+
+        pattern = METRIC_PATTERN
+        matchs = re.findall(pattern, sout)
+        result["result"].setdefault(prec, {"status": "FAIL"})
+        logging.debug(f"matchs:\n{matchs}")
+        for m in matchs:
+            result["result"][prec].update(get_metric_result(m))
+        if len(matchs) == 2:
+            result["result"][prec]["status"] = "PASS"
+
+        result["result"][prec]["Cost time (s)"] = t
+    return result
+
+def run_speech_testcase(model):
+    model_name = model["name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['relative_path']}
+    bash ci/prepare.sh
+    ls -l | grep onnx
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['relative_path']}
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        if model_name == "transformer_asr":
+            script = f"""
+            cd ../{model['relative_path']}
+            python3 inference.py hparams/train_ASR_transformer.yaml --data_folder=/home/data/speechbrain/aishell --engine_path transformer.engine 
+            """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        pattern = METRIC_PATTERN
+        matchs = re.findall(pattern, sout)
+        result["result"].setdefault(prec, {"status": "FAIL"})
+        logging.debug(f"matchs:\n{matchs}")
+        for m in matchs:
+            result["result"][prec].update(get_metric_result(m))
+        if len(matchs) == 2:
+            result["result"][prec]["status"] = "PASS"
+
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+    return result
+
+def get_metric_result(str):
+    if str:
+        return json.loads(str.replace("'", "\""))["metricResult"]
+    return None
+
+def run_script(script):
+    start_time = time.perf_counter()
+    result = subprocess.run(
+        script, shell=True, capture_output=True, text=True, executable="/bin/bash"
+    )
+    end_time = time.perf_counter()
+    execution_time = end_time - start_time
+    logging.debug(f"执行命令：\n{script}")
+    logging.debug("执行时间: {:.4f} 秒".format(execution_time))
+    logging.debug(f"标准输出: {result.stdout}")
+    logging.debug(f"标准错误: {result.stderr}")
+    logging.debug(f"返回码: {result.returncode}")
+    return result, execution_time
+
+if __name__ == "__main__":
+    main()
diff --git a/toolbox/ByteMLPerf/.gitignore b/toolbox/ByteMLPerf/.gitignore
index 2e06b074245514d00a97d339bae65d92a7fae2ab..ad7dbe99ea6339b6072c45365aeac7ab2be968fa 100644
--- a/toolbox/ByteMLPerf/.gitignore
+++ b/toolbox/ByteMLPerf/.gitignore
@@ -15,7 +15,7 @@ byte_infer_perf/general_perf/model_zoo/*
 byte_infer_perf/general_perf/download/*.*
 !byte_infer_perf/general_perf/download/README.md
 byte_infer_perf/general_perf/datasets/open_imagenet/preprocessed/
-byte_infer_perf/general_perf/datasets/*
+# byte_infer_perf/general_perf/datasets/*
 !byte_infer_perf/general_perf/datasets/fake_dataset
 !*.py
 byte_infer_perf/general_perf/reports/*
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/common.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/common.py
index c4576743f081ebb7270ab663cafe3535a661105b..4062220c95bc4285bbe27e7e7a47027f7bc8da2b 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/common.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/common.py
@@ -26,8 +26,8 @@ import threading
 
 import importlib
 
-tensorrt = None      
-Dims = None                                                                           
+import tensorrt
+from tensorrt import Dims                                                                           
                           
 tvm = None  
 
@@ -39,25 +39,7 @@ def setup_seed(seed):
      torch.backends.cudnn.deterministic = True
 
 
-def load_ixrt_plugin(logger=None, namespace="", dynamic_path="", model="", precision=""):
-    global tensorrt
-    global Dims
-
-    if tensorrt is not None:
-        return
-    
-    if precision == 'FP16':
-        if model == 'resnet50' or model == 'bert' or model == 'albert' or model == 'deberta' or model == 'yolov5':
-            tensorrt = importlib.import_module("tensorrt_legacy")
-            Dims = getattr(tensorrt, "Dims")
-        else:
-            tensorrt = importlib.import_module("tensorrt")
-            Dims = getattr(tensorrt, "Dims")
-    
-    if precision == 'INT8':
-        tensorrt = importlib.import_module("tensorrt")
-        Dims = getattr(tensorrt, "Dims")
-    
+def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.WARNING), namespace="", dynamic_path="", model="", precision=""):
     if not dynamic_path:
         dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so")
 
@@ -66,7 +48,7 @@ def load_ixrt_plugin(logger=None, namespace="", dynamic_path="", model="", preci
             f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
     
     ctypes.CDLL(dynamic_path, mode=ctypes.RTLD_GLOBAL)
-    tensorrt.init_libnvinfer_plugins(tensorrt.Logger(tensorrt.Logger.INFO), namespace)
+    tensorrt.init_libnvinfer_plugins(logger, namespace)
     print(f"Loaded plugin from {dynamic_path}")
 
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/README.md b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..3d1318032a7b03971285a05b997d3275c0d3c3cf
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/README.md
@@ -0,0 +1,114 @@
+# IxRT optimizer
+
+## 1. optimizer 简介
+
+`optimizer` 是一个 ixrt 中集成的图融合工具，用于将onnx图中的op融合成对应的IxRT plugin，一般与 IxRT 配合进行使用；
+
+## 2. optimizer 功能说明
+
+| 功能       | 说明                                                         |
+| ---------- | ------------------------------------------------------------ |
+| 动态图支持 | 支持融合动态图和静态图                                       |
+| 模型支持   | 目前测试通过videobert, roberta, deberta, swinL, roformer, albert, yolov5s, visionTransformer, gpt2模型，其他模型暂不推荐使用该工具 |
+
+## 3. optimizer 运行参数
+
+| 参数             | 说明                                                         |
+| ---------------- | ------------------------------------------------------------ |
+| `--onnx`         | 必选 ，指定要运行的 onnx 模型路径                            |
+| `--num_heads`    | 可选 ，指定模型对应Attention模块注意力头的个数               |
+| `--hidden_size`  | 可选， 模型模型隐藏层的大小                                  |
+| `--input_shapes` | 可选 ，固定动态模型的输入形状，以从静态形状推理，示例 --input_shapes "input_name1:3x224x224, input_name2:3x224x224"类型 |
+| `--dump_onnx`    | 可选 ，用于图融合过程中dump出中间的onnx图，生成 _sim 结尾的 onnx 模型 |
+| `--model_type`   | 可选 ，可以指定要融合的模型类型，默认是"bert", 可选["bert", "swint", "roformer", "yolo", "gpt2", "vit"] |
+| `--log_level`    | 可选 ，指定IxRT运行时显示日志的等级， 可指定为debug、info、error，默认为 info |
+
+
+## 4. 运行示例
+
+###  4.1 示例1：融合albert|videobert|roberta|deberta
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH}
+```
+
+###  4.2 示例2：融合swinL
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH} --input_shapes pixel_values.1:${BS}x3x384x384 --model_type swint
+```
+
+###  4.3 示例3：融合roformer
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH} --model_type roformer
+```
+
+###  4.4 示例4：融合yolov5s
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH} --model_type yolo
+```
+
+### 4.5 精度验证
+
+#### 4.5.1 示例1：albert模型
+
+模型变量示例：
+
+```
+MODEL_PATH="data/albert/albert-base-squad.onnx"
+MODEL_END_PATH="data/albert/albert-base-squad_end.onnx"
+MODEL_ENGINE_PATH="data/albert/albert-base-squad_end.engine"
+```
+
+运行命令
+
+```bash
+cd oss/tools/optimizer
+python3 optimizer.py --onnx ${MODEL_PATH} --dump_onnx
+ixrtexec --onnx ${MODEL_END_PATH} --min_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \
+                                  --opt_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \
+                                  --max_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \
+                                  --save_engine ${MODEL_ENGINE_PATH} --log_level verbose --plugins ixrt_plugin
+ixrtexec --load_engine ${MODEL_ENGINE_PATH} --ort_onnx ${MODEL_PATH} --plugins ixrt_plugin --verify_acc
+```
+
+#### 4.5.2 示例2：swinL模型
+
+模型变量示例：
+
+```
+BS=1
+MODEL_PATH="data/swint/swin-transformer-large.onnx"
+MODEL_END_PATH = "data/swint/swin-transformer-large_end.onnx"
+MODEL_ENGINE_PATH = "data/swint/swin-transformer-large_end.engine"
+MODEL_SIM_STATIC_SIM_PATH = "data/swint/swin-transformer-large_sim_static_sim.onnx"
+```
+
+运行命令
+
+```bash
+cd oss/tools/optimizer
+# 固定输入形状为 ${BS}x3x384x384
+python3 optimizer.py --onnx ${MODEL_PATH} --input_shapes pixel_values.1:${BS}x3x384x384 --model_type swint --dump_onnx
+
+# Build engine
+ixrtexec --onnx ${MODEL_END_PATH} --save_engine ${MODEL_ENGINE_PATH} --log_level verbose --plugins ixrt_plugin
+
+# 测试性能
+ixrtexec --load_engine ${MODEL_ENGINE_PATH} --plugins ixrt_plugin
+
+# 测试精度
+ixrtexec --load_engine ${MODEL_ENGINE_PATH} --ort_onnx ${MODEL_SIM_STATIC_SIM_PATH} --plugins ixrt_plugin --verify_acc
+```
+
+请参考[高级话题](5_advanced_topics.md)中的<u>精度对比工具</u>一节，了解详细使用方法和原理。
+
+也可以用[C++ API 使用简介](3_cpp_api.md)或 [Python API 使用简介](4_python_api.md)
+
+具体使用方法可以参考oss/samples
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/__init__.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..de522e5b082b122a28b0a0423a40909598aa82d5 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/__init__.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_PVT.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_PVT.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a9c0ca081a1b44c00b0909c2b69c0e5a00c1e6a
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_PVT.py
@@ -0,0 +1,593 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import List, Optional
+
+import onnx
+from onnx import GraphProto, ModelProto, TensorProto, ValueInfoProto, helper
+from passes.fuse_series_bias_add import FusionSerialBiasAdd
+from passes.fusion_albert_attention import FusionAlbertAttention
+from passes.fusion_attention import AttentionMask, FusionAttention
+from passes.fusion_biasgelu import FusionBiasGelu
+from passes.fusion_customfc import (
+    FusionCustomFC,
+    FusionCustomFCActivation,
+    FusionCustomFCGPT2,
+)
+from passes.fusion_disentangled_attention import FusionDisentangledAttention
+from passes.fusion_embedlayer import FusionEmbedLayerNormalization
+from passes.fusion_fastgelu import FusionFastGelu
+from passes.fusion_format_roformer import (
+    FusionFormatInvalidMask,
+    FusionRemoveUselessElementwise,
+)
+from passes.fusion_gelu import FusionGelu
+from passes.fusion_gelu_approximation import FusionGeluApproximation
+from passes.fusion_gpt_attention_no_past import FusionGptAttentionNoPast
+from passes.fusion_layernorm import FusionLayerNormalization, FusionLayerNormalizationTF
+from passes.fusion_options import FusionOptions
+from passes.fusion_qordered_attention import FusionQOrderedAttention
+from passes.fusion_qordered_gelu import FusionQOrderedGelu
+from passes.fusion_qordered_layernorm import FusionQOrderedLayerNormalization
+from passes.fusion_qordered_matmul import FusionQOrderedMatMul
+from passes.fusion_reshape import FusionReshape
+from passes.fusion_shape import FusionShape
+from passes.fusion_skiplayernorm import (
+    FusionBiasSkipLayerNormalization,
+    FusionSkipLayerNormalization,
+)
+
+from passes.fusion_utils import FusionUtils
+
+from passes.fusion_conv_reformat import FusionConvReformat
+
+from passes.fusion_xsoftmax import FusionXSoftmax
+from passes.fusion_PVT_attention import FusionPVTAttention
+from passes.onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+
+class PVTOptimizationOptions(FusionOptions):
+    """This class is deprecated"""
+
+    def __init__(self, model_type):
+        logger.warning(
+            f"PVTOptimizationOptions is depreciated. Please use FusionOptions instead."
+        )
+        super().__init__(model_type)
+
+
+class PVTOnnxModel(OnnxModel):
+    def __init__(self, model: ModelProto, num_heads: int = 0, hidden_size: int = 0):
+        """Initialize BERT ONNX Model.
+
+        Args:
+            model (ModelProto): the ONNX model
+            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
+            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
+        """
+        assert (num_heads == 0 and hidden_size == 0) or (
+            num_heads > 0 and hidden_size % num_heads == 0
+        )
+
+        super().__init__(model)
+        self.num_heads = num_heads
+        self.hidden_size = hidden_size
+
+        self.attention_mask = AttentionMask(self)
+        self.attention_fusion = FusionAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        )
+        self.qordered_attention_fusion = FusionQOrderedAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        )
+        self.utils = FusionUtils(self)
+
+    def fuse_attention(self):
+        self.attention_fusion.apply()
+        FusionAlbertAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        ).apply()
+        # FusionVideoBertAttention(self).apply()
+        # FusionVITAttention(self).apply()
+        # FusionSwinLAttention(self).apply()
+        # FusionGptAttentionNoPast(self).apply()
+        FusionPVTAttention(self).apply()
+        # Only relevant in models with Q-DQ nodes
+        self.qordered_attention_fusion.apply()
+
+    def fuse_format_roformer(self):
+        FusionRemoveUselessElementwise(self).apply()
+        fusion = FusionFormatInvalidMask(self)
+        fusion.apply()
+
+    def fuse_custom_fc(self):
+        fusion = FusionCustomFC(self)
+        fusion.apply()
+
+    def fuse_custom_fc_activation(self):
+        fusion = FusionCustomFCActivation(self)
+        fusion.apply()
+
+    def fuse_custom_fc_gpt2_classify(self):
+        fusion = FusionCustomFCGPT2(self)
+        fusion.apply()
+
+    def fuse_swinT_serial_bias_add(self):
+        fusion = FusionSerialBiasAdd(self)
+        fusion.apply()
+
+    def fuse_gelu(self):
+        fusion = FusionGelu(self)
+        fusion.apply()
+        fusion = FusionFastGelu(self)
+        fusion.apply()
+        # Only relevant in models with Q-DQ nodes
+        fusion = FusionQOrderedGelu(self)
+        fusion.apply()
+
+    def fuse_bias_gelu(self, is_fastgelu):
+        fusion = FusionBiasGelu(self, is_fastgelu)
+        fusion.apply()
+
+    def fuse_custom_xsoftmax(self):
+        fusion = FusionXSoftmax(self)
+        fusion.apply()
+
+    def fuse_disentangled_attention(self):
+        fusion = FusionDisentangledAttention(self)
+        fusion.apply()
+
+    def gelu_approximation(self):
+        fusion = FusionGeluApproximation(self)
+        fusion.apply()
+
+    def fuse_add_bias_skip_layer_norm(self):
+        fusion = FusionBiasSkipLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_reshape(self):
+        fusion = FusionReshape(self)
+        fusion.apply()
+
+    def fuse_shape(self):
+        fusion = FusionShape(self)
+        fusion.apply()
+
+    def fuse_embed_layer(self):
+        fusion = FusionEmbedLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_layer_norm(self):
+        fusion = FusionLayerNormalization(self, self.hidden_size)
+        fusion.apply()
+
+        fusion = FusionLayerNormalizationTF(self)
+        fusion.apply()
+
+        # Only relevant in models with Q-DQ nodes
+        fusion = FusionQOrderedLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_skip_layer_norm(self):
+        fusion = FusionSkipLayerNormalization(self)
+        fusion.apply()
+
+    # Only relevant in models with Q-DQ nodes
+    def fuse_qordered_mamtul(self):
+        fusion = FusionQOrderedMatMul(self)
+        fusion.apply()
+        
+    def conv_reformat(self):
+        fusion = FusionConvReformat(self)
+        fusion.apply()    
+        
+        
+
+    def get_graph_inputs_from_node_type(
+        self, op_type: str, input_indices: List[int], casted: bool
+    ):
+        """
+        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
+        Returns a list of the graph input names based on the filter whether it is casted or not.
+        """
+        graph_inputs = []
+
+        output_name_to_node = self.output_name_to_node()
+        nodes = self.get_nodes_by_op_type(op_type)
+        for node in nodes:
+            bert_inputs = [node.input[i] for i in input_indices if i < len(node.input)]
+            for bert_input in bert_inputs:
+                if self.find_graph_input(bert_input):
+                    if not casted:
+                        graph_inputs.append(bert_input)
+                elif bert_input in output_name_to_node:
+                    parent = output_name_to_node[bert_input]
+                    if (
+                        parent.op_type == "Cast"
+                        and self.find_graph_input(parent.input[0]) is not None
+                    ):
+                        if casted:
+                            graph_inputs.append(parent.input[0])
+        return graph_inputs
+
+    def get_graph_inputs_from_fused_nodes(self, casted: bool):
+        inputs = self.get_graph_inputs_from_node_type(
+            "EmbedLayerNormalization", [0, 1, 7], casted
+        )
+        inputs += self.get_graph_inputs_from_node_type("Attention", [3], casted)
+        return inputs
+
+    def change_graph_input_type(
+        self,
+        graph: GraphProto,
+        graph_input: ValueInfoProto,
+        new_type: int = TensorProto.INT32,
+    ):
+        """Change graph input type, and add Cast node if needed.
+
+        Args:
+            graph (GraphProto): graph
+            graph_input (TensorProto): input of the graph
+            new_type (int, optional): new data type. Defaults to TensorProto.INT32.
+
+        Returns:
+            NodeProto: a new Cast node that added. None if Cast node is not added.
+            List[NodeProto]: Cast nodes that have been removed.
+        """
+        assert isinstance(graph, GraphProto)
+        assert isinstance(graph_input, ValueInfoProto)
+        assert self.find_graph_input(graph_input.name)
+
+        if graph_input.type.tensor_type.elem_type == int(new_type):
+            return None, []
+
+        new_cast_node = None
+        nodes_to_remove = []
+
+        input_name_to_nodes = self.input_name_to_nodes()
+        if graph_input.name in input_name_to_nodes:
+            nodes = input_name_to_nodes[graph_input.name]
+
+            # For children that is not Cast node, insert a Cast node to convert int32 to original data type.
+            nodes_not_cast = [node for node in nodes if node.op_type != "Cast"]
+            if nodes_not_cast:
+                node_name = self.create_node_name("Cast")
+                output_name = node_name + "_" + graph_input.name
+                new_value_info = graph.value_info.add()
+                new_value_info.CopyFrom(graph_input)
+                new_value_info.name = output_name
+                new_cast_node = helper.make_node(
+                    "Cast",
+                    [graph_input.name],
+                    [output_name],
+                    to=int(graph_input.type.tensor_type.elem_type),
+                    name=node_name,
+                )
+                graph.node.extend([new_cast_node])
+
+                for node in nodes_not_cast:
+                    OnnxModel.replace_node_input(node, graph_input.name, output_name)
+
+            # For children that is Cast node, no need to insert Cast.
+            # When the children is Cast to int32, we can remove that Cast node since input type is int32 now.
+            nodes_cast = [node for node in nodes if node.op_type == "Cast"]
+            for node in nodes_cast:
+                if OnnxModel.get_node_attribute(node, "to") == int(new_type):
+                    self.replace_input_of_all_nodes(node.output[0], graph_input.name)
+                if not self.find_graph_output(node.output[0]):
+                    nodes_to_remove.append(node)
+            if nodes_to_remove:
+                self.remove_nodes(nodes_to_remove)
+
+        graph_input.type.tensor_type.elem_type = int(new_type)
+        return new_cast_node, nodes_to_remove
+
+    def change_graph_inputs_to_int32(self):
+        """Change data type of all graph inputs to int32 type, and add Cast node if needed."""
+        graph = self.graph()
+        add_cast_count = 0
+        remove_cast_count = 0
+        for graph_input in graph.input:
+            new_node, removed_nodes = self.change_graph_input_type(
+                graph, graph_input, TensorProto.INT32
+            )
+            if new_node:
+                add_cast_count += 1
+            remove_cast_count += len(removed_nodes)
+        logger.info(
+            f"Graph inputs are changed to int32. Added {add_cast_count} Cast nodes, and removed {remove_cast_count} Cast nodes."
+        )
+
+    def use_dynamic_axes(
+        self, dynamic_batch_dim="batch_size", dynamic_seq_len="max_seq_len"
+    ):
+        """
+        Update input and output shape to use dynamic axes.
+        """
+        bert_graph_inputs = self.get_graph_inputs_from_fused_nodes(
+            casted=True
+        ) + self.get_graph_inputs_from_fused_nodes(casted=False)
+
+        dynamic_batch_inputs = {}
+        for input in self.model.graph.input:
+            if input.name in bert_graph_inputs:
+                dim_proto = input.type.tensor_type.shape.dim[0]
+                dim_proto.dim_param = dynamic_batch_dim
+                if dynamic_seq_len is not None:
+                    dim_proto = input.type.tensor_type.shape.dim[1]
+                    dim_proto.dim_param = dynamic_seq_len
+
+        for output in self.model.graph.output:
+            dim_proto = output.type.tensor_type.shape.dim[0]
+            dim_proto.dim_param = dynamic_batch_dim
+
+    def preprocess(self):
+        self.adjust_reshape_and_expand()
+        return
+
+    def adjust_reshape_and_expand(self):
+        nodes_to_remove = []
+        for node in self.nodes():
+            if node.op_type == "Reshape":
+                # Clean up unneccessary reshape nodes.
+                # Find reshape nodes with no actually data in "shape" attribute and remove.
+                reshape_shape = self.get_constant_value(node.input[1])
+                if reshape_shape is not None and reshape_shape.size == 0:
+                    nodes_to_remove.extend([node])
+                    self.replace_input_of_all_nodes(node.output[0], node.input[0])
+                    continue
+
+                # Find path "Slice" -> "Reshape" -> "Expand" -> "Expand" -> current "Reshape", simplify the graph by
+                # changing current reshape's input to output of slice.
+                reshape_path = self.match_parent_path(
+                    node,
+                    ["Expand", "Expand", "Reshape", "Slice"],
+                    [0, 0, 0, 0],
+                    self.output_name_to_node(),
+                )
+                if reshape_path is not None:
+                    expand_node = reshape_path[-3]
+                    expand_shape_value = self.get_constant_value(expand_node.input[1])
+
+                    reshape_before_expand = reshape_path[-2]
+                    shape_value = self.get_constant_value(
+                        reshape_before_expand.input[1]
+                    )
+
+                    slice_node = reshape_path[-1]
+                    if (
+                        expand_shape_value is not None
+                        and shape_value is not None
+                        and len(expand_shape_value) == 2
+                        and len(shape_value) == 1
+                        and expand_shape_value[1] == shape_value[0]
+                    ):
+                        node.input[0] = slice_node.output[0]
+
+        if nodes_to_remove:
+            self.remove_nodes(nodes_to_remove)
+            logger.info(f"Removed Reshape and Expand count: {len(nodes_to_remove)}")
+
+    def clean_graph(self):
+        output_name_to_node = self.output_name_to_node()
+        nodes_to_remove = []
+        for node in self.nodes():
+            # Before:
+            #  input_ids --> Shape --> Gather(indices=0) --> Unsqueeze ------+
+            #          |                                                     |
+            #          |                                                     v
+            #          +----> Shape --> Gather(indices=1) --> Unsqueeze--->  Concat --> ConstantOfShape -->Cast --> EmbedLayerNormaliation/ReduceSum
+            # After:
+            #  input_ids --> Shape                                                  --> ConstantOfShape -->Cast --> EmbedLayerNormaliation/ReduceSum
+            # TODO: merge ConstantOfShape -->Cast to ConstantOfShape (need update the data type of value)
+            op_input_id = {"EmbedLayerNormalization": 1, "ReduceSum": 0, "Attention": 3}
+            if node.op_type in op_input_id:
+                i = op_input_id[node.op_type]
+                parent_nodes = self.match_parent_path(
+                    node,
+                    [
+                        "Cast",
+                        "ConstantOfShape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                    ],
+                    [i, 0, 0, 0, 0, 0],
+                    output_name_to_node,
+                )
+                if parent_nodes is not None:
+                    (
+                        cast,
+                        constantOfShape,
+                        concat,
+                        unsqueeze,
+                        gather,
+                        shape,
+                    ) = parent_nodes
+                    if shape.input[0] == self.graph().input[0].name:
+                        constantOfShape.input[0] = shape.output[0]
+                        output_name_to_node = self.output_name_to_node()
+
+            if node.op_type == "Attention":
+                # Before:
+                #   input_ids --> Shape -->ConstantOfShape -->Cast --> ReduceSum --> Attention
+                # After:
+                #   remove this path, and remove the optional mask_index input of Attention node.
+                parent_nodes = self.match_parent_path(
+                    node,
+                    ["ReduceSum", "Cast", "ConstantOfShape", "Shape"],
+                    [3, 0, 0, 0],
+                    output_name_to_node,
+                )
+                if parent_nodes is not None:
+                    if parent_nodes[-1].input[0] == self.graph().input[0].name:
+                        attention_node = helper.make_node(
+                            "Attention",
+                            inputs=node.input[0 : len(node.input) - 1],
+                            outputs=node.output,
+                            name=node.name + "_remove_mask",
+                        )
+                        attention_node.domain = "com.microsoft"
+                        attention_node.attribute.extend(
+                            [helper.make_attribute("num_heads", self.num_heads)]
+                        )
+                        self.add_node(
+                            attention_node, self.get_graph_by_node(attention_node).name
+                        )
+                        nodes_to_remove.append(node)
+        self.remove_nodes(nodes_to_remove)
+
+    def postprocess(self):
+        self.clean_graph()
+        self.prune_graph()
+
+    def optimize(
+        self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False
+    ):
+        if (options is not None) and not options.enable_shape_inference:
+            self.disable_shape_inference()
+
+        self.utils.remove_identity_nodes()
+
+        # Remove cast nodes that having same data type of input and output based on symbolic shape inference.
+        self.utils.remove_useless_cast_nodes()
+
+        if (options is None) or options.enable_layer_norm:
+            self.fuse_layer_norm()
+
+        if (options is None) or options.enable_gelu:
+            self.fuse_gelu()
+
+        self.preprocess()
+
+        self.fuse_reshape()
+
+        if (options is None) or options.enable_skip_layer_norm:
+            self.fuse_skip_layer_norm()
+
+        if options.enable_swint_opt:
+            self.fuse_custom_fc()
+            self.fuse_swinT_serial_bias_add()
+
+        if options.enable_format_roformer:
+            self.fuse_format_roformer()
+
+        if options.enable_gpt2_classify or options.enable_vit:
+            self.fuse_custom_fc_gpt2_classify()
+
+        if options.enable_vit:
+            self.fuse_custom_fc()
+
+        # if (options is None) or options.enable_attention:
+        #     if options is not None:
+        #         self.attention_mask.set_mask_format(options.attention_mask_format)
+        self.fuse_attention()
+        
+        self.conv_reformat()
+
+        if (options is None) or options.enable_skip_layer_norm:
+            self.fuse_skip_layer_norm()
+
+        self.fuse_custom_fc()
+
+        self.fuse_custom_xsoftmax()
+
+        self.fuse_disentangled_attention()
+
+        # Perform the MatMul fusion after the Attention fusion as we do not
+        # want to fuse the MatMuls inside the Attention subgraphs
+        if (options is None) or options.enable_qordered_matmul:
+            self.fuse_qordered_mamtul()
+
+        self.fuse_shape()
+
+        if (options is None) or options.enable_embed_layer_norm:
+            self.fuse_embed_layer()
+
+        # Remove reshape nodes that having same shape of input and output based on symbolic shape inference.
+        self.utils.remove_useless_reshape_nodes()
+
+        self.postprocess()
+
+        # Bias fusion is done after postprocess to avoid extra Reshape between bias and Gelu/FastGelu/SkipLayerNormalization
+        if (options is None) or options.enable_bias_gelu:
+            # Fuse Gelu and Add Bias before it.
+            self.fuse_bias_gelu(is_fastgelu=True)
+            self.fuse_bias_gelu(is_fastgelu=False)
+
+        if (options is None) or options.enable_bias_skip_layer_norm:
+            # Fuse SkipLayerNormalization and Add Bias before it.
+            self.fuse_add_bias_skip_layer_norm()
+
+        if options is not None and options.enable_gelu_approximation:
+            self.gelu_approximation()
+
+        self.fuse_custom_fc_activation()
+
+        self.remove_unused_constant()
+
+        # Use symbolic batch dimension in input and output.
+        if add_dynamic_axes:
+            self.use_dynamic_axes()
+
+        logger.info(f"opset version: {self.get_opset_version()}")
+
+    def get_fused_operator_statistics(self):
+        """
+        Returns node count of fused operators.
+        """
+        op_count = {}
+        ops = [
+            "EmbedLayerNormalization",
+            "Attention",
+            "QOrderedAttention",
+            "Gelu",
+            "QOrderedGelu",
+            "FastGelu",
+            "BiasGelu",
+            "LayerNormalization",
+            "QOrderedLayerNormalization",
+            "SkipLayerNormalization",
+            "QOrderedMatMul",
+        ]
+        for op in ops:
+            nodes = self.get_nodes_by_op_type(op)
+            op_count[op] = len(nodes)
+        logger.info(f"Optimized operators:{op_count}")
+        return op_count
+
+    def is_fully_optimized(self):
+        """
+        Returns True when the model is fully optimized.
+        """
+        op_count = self.get_fused_operator_statistics()
+        embed = op_count["EmbedLayerNormalization"]
+        attention = op_count["Attention"] + op_count["QOrderedAttention"]
+        gelu = op_count["Gelu"] + op_count["BiasGelu"] + op_count["FastGelu"]
+        layer_norm = op_count["LayerNormalization"] + op_count["SkipLayerNormalization"]
+        is_perfect = (
+            (embed > 0)
+            and (attention > 0)
+            and (attention == gelu)
+            and (layer_norm >= 2 * attention)
+        )
+
+        if layer_norm == 0:
+            logger.debug("Layer Normalization not fused")
+
+        if gelu == 0:
+            logger.debug("Gelu/FastGelu not fused")
+
+        if embed == 0:
+            logger.debug("Embed Layer not fused")
+
+        if attention == 0:
+            logger.warning("Attention not fused")
+
+        return is_perfect
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_bert.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_bert.py
index 7c40a978ea07b4a2bd107cd0cbba1c63ecea7256..7324603e61bb7a13a57e586827c8fa67a9af4ae2 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_bert.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_bert.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -16,6 +32,7 @@ from passes.fusion_customfc import (
     FusionCustomFC,
     FusionCustomFCActivation,
     FusionCustomFCGPT2,
+    FusionTorchvisionVitCustomFC,
 )
 from passes.fusion_disentangled_attention import FusionDisentangledAttention
 from passes.fusion_embedlayer import FusionEmbedLayerNormalization
@@ -42,8 +59,11 @@ from passes.fusion_skiplayernorm import (
 from passes.fusion_swinl_attention import FusionSwinLAttention
 from passes.fusion_utils import FusionUtils
 from passes.fusion_videobert_attention import FusionVideoBertAttention
-from passes.fusion_vit_attention import FusionVITAttention
+from passes.fusion_vit_attention import FusionVITAttention, FusionTorchvisionVITAttention
 from passes.fusion_xsoftmax import FusionXSoftmax
+from passes.fuse_inverse_sigmoid import FusionLayerInverseSigmoid
+from passes.fuse_l2_normalization import FusionLayerL2Normalization
+from passes.fuse_omdet_attention import FusionLayerOmdetAttention
 from passes.onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -92,6 +112,7 @@ class BertOnnxModel(OnnxModel):
         ).apply()
         FusionVideoBertAttention(self).apply()
         FusionVITAttention(self).apply()
+        FusionTorchvisionVITAttention(self).apply()
         FusionSwinLAttention(self).apply()
         FusionGptAttentionNoPast(self).apply()
         # Only relevant in models with Q-DQ nodes
@@ -106,6 +127,10 @@ class BertOnnxModel(OnnxModel):
         fusion = FusionCustomFC(self)
         fusion.apply()
 
+    def fuse_custom_fc_torchvision_vit(self):
+        fusion = FusionTorchvisionVitCustomFC(self)
+        fusion.apply()
+    
     def fuse_custom_fc_activation(self):
         fusion = FusionCustomFCActivation(self)
         fusion.apply()
@@ -179,6 +204,18 @@ class BertOnnxModel(OnnxModel):
         fusion = FusionQOrderedMatMul(self)
         fusion.apply()
 
+    def fuse_omdet_inverse_sigmoid(self):
+        fusion = FusionLayerInverseSigmoid(self)
+        fusion.apply()
+
+    def fuse_omdet_attention(self):
+        fusion = FusionLayerOmdetAttention(self)
+        fusion.apply()
+
+    def fuse_l2_normalization(self):
+        fusion = FusionLayerL2Normalization(self)
+        fusion.apply()
+
     def get_graph_inputs_from_node_type(
         self, op_type: str, input_indices: List[int], casted: bool
     ):
@@ -484,6 +521,11 @@ class BertOnnxModel(OnnxModel):
             self.fuse_skip_layer_norm()
 
         self.fuse_custom_fc()
+        
+        if options.enable_omdet:
+            self.fuse_omdet_attention()
+            self.fuse_omdet_inverse_sigmoid()
+            self.fuse_l2_normalization()
 
         self.fuse_custom_xsoftmax()
 
@@ -518,6 +560,9 @@ class BertOnnxModel(OnnxModel):
             self.gelu_approximation()
 
         self.fuse_custom_fc_activation()
+        
+        if options.enable_vit:
+            self.fuse_custom_fc_torchvision_vit()
 
         self.remove_unused_constant()
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_conformer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_conformer.py
index a250a9ea05c5d7b625523e62b976f94fa7ab6cff..cc59c37bd48f677a7d06f141f45eaa55aef54656 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_conformer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_conformer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -573,4 +589,3 @@ class conformerOnnxModel(OnnxModel):
             logger.warning("Attention not fused")
 
         return is_perfect
-
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_cosyvoice.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_cosyvoice.py
new file mode 100755
index 0000000000000000000000000000000000000000..98cfc6699ab5276f2fd37915a62487a173fb4d12
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_cosyvoice.py
@@ -0,0 +1,640 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import List, Optional
+
+import onnx
+from onnx import GraphProto, ModelProto, TensorProto, ValueInfoProto, helper
+from passes.fuse_series_bias_add import FusionSerialBiasAdd
+from passes.fusion_albert_attention import FusionAlbertAttention
+from passes.fusion_attention import AttentionMask, FusionAttention
+from passes.fusion_biasgelu import FusionBiasGelu
+from passes.fusion_customfc import (
+    FusionCustomFC,
+    FusionCustomFCActivation,
+    FusionCustomFCGPT2,
+    FusionTorchvisionVitCustomFC,
+)
+from passes.fusion_disentangled_attention import FusionDisentangledAttention
+from passes.fusion_embedlayer import FusionEmbedLayerNormalization
+from passes.fusion_fastgelu import FusionFastGelu
+from passes.fusion_format_roformer import (
+    FusionFormatInvalidMask,
+    FusionRemoveUselessElementwise,
+)
+from passes.fusion_gelu import FusionGelu
+from passes.fusion_gelu_approximation import FusionGeluApproximation
+from passes.fusion_gpt_attention_no_past import FusionGptAttentionNoPast
+from passes.fusion_layernorm import FusionLayerNormalization, FusionLayerNormalizationTF
+from passes.fusion_options import FusionOptions
+from passes.fusion_qordered_attention import FusionQOrderedAttention
+from passes.fusion_qordered_gelu import FusionQOrderedGelu
+from passes.fusion_qordered_layernorm import FusionQOrderedLayerNormalization
+from passes.fusion_qordered_matmul import FusionQOrderedMatMul
+from passes.fusion_reshape import FusionReshape
+from passes.fusion_shape import FusionShape
+from passes.fusion_skiplayernorm import (
+    FusionBiasSkipLayerNormalization,
+    FusionSkipLayerNormalization,
+)
+from passes.fusion_swinl_attention import FusionSwinLAttention
+from passes.fusion_utils import FusionUtils
+from passes.fusion_videobert_attention import FusionVideoBertAttention
+from passes.fusion_vit_attention import FusionVITAttention, FusionTorchvisionVITAttention
+from passes.fusion_xsoftmax import FusionXSoftmax
+from passes.fuse_inverse_sigmoid import FusionLayerInverseSigmoid
+from passes.fuse_l2_normalization import FusionLayerL2Normalization
+from passes.fuse_omdet_attention import FusionLayerOmdetAttention
+from passes.onnx_model import OnnxModel
+
+from passes.fusion_cosyvoice_splitQKV_update_KVcache import FusionCosyVoiceSplitQKVUpdateKVCache
+from passes.fusion_cosyvoice_attention import (
+    FusionCosyvoiceAttention
+)
+from passes.fusion_cosyvoice_splitQKV import FusionSplitQKV
+
+
+
+logger = getLogger(__name__)
+
+
+
+class cosyvoiceOnnxModel(OnnxModel):
+    def __init__(self, model: ModelProto, num_heads: int = 16, hidden_size: int = 1024):
+        """Initialize BERT ONNX Model.
+
+        Args:
+            model (ModelProto): the ONNX model
+            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
+            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
+        """
+        assert (num_heads == 0 and hidden_size == 0) or (
+            num_heads > 0 and hidden_size % num_heads == 0
+        )
+
+        super().__init__(model)
+        self.num_heads = num_heads
+        self.hidden_size = hidden_size
+
+        self.attention_mask = AttentionMask(self)
+        self.attention_fusion = FusionAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        )
+        self.qordered_attention_fusion = FusionQOrderedAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        )
+        self.utils = FusionUtils(self)
+
+    def fuse_attention(self):
+        self.attention_fusion.apply()
+        FusionAlbertAttention(
+            self, self.hidden_size, self.num_heads, self.attention_mask
+        ).apply()
+        FusionVideoBertAttention(self).apply()
+        FusionVITAttention(self).apply()
+        FusionTorchvisionVITAttention(self).apply()
+        FusionSwinLAttention(self).apply()
+        FusionGptAttentionNoPast(self).apply()
+        # Only relevant in models with Q-DQ nodes
+        self.qordered_attention_fusion.apply()
+
+    def fuse_format_roformer(self):
+        FusionRemoveUselessElementwise(self).apply()
+        fusion = FusionFormatInvalidMask(self)
+        fusion.apply()
+
+    def fuse_custom_fc(self):
+        fusion = FusionCustomFC(self)
+        fusion.apply()
+
+    def fuse_custom_fc_torchvision_vit(self):
+        fusion = FusionTorchvisionVitCustomFC(self)
+        fusion.apply()
+    
+    def fuse_custom_fc_activation(self):
+        fusion = FusionCustomFCActivation(self)
+        fusion.apply()
+
+    def fuse_custom_fc_gpt2_classify(self):
+        fusion = FusionCustomFCGPT2(self)
+        fusion.apply()
+
+    def fuse_swinT_serial_bias_add(self):
+        fusion = FusionSerialBiasAdd(self)
+        fusion.apply()
+
+    def fuse_gelu(self):
+        fusion = FusionGelu(self)
+        fusion.apply()
+        fusion = FusionFastGelu(self)
+        fusion.apply()
+        # Only relevant in models with Q-DQ nodes
+        fusion = FusionQOrderedGelu(self)
+        fusion.apply()
+
+    def fuse_bias_gelu(self, is_fastgelu):
+        fusion = FusionBiasGelu(self, is_fastgelu)
+        fusion.apply()
+
+    def fuse_custom_xsoftmax(self):
+        fusion = FusionXSoftmax(self)
+        fusion.apply()
+
+    def fuse_disentangled_attention(self):
+        fusion = FusionDisentangledAttention(self)
+        fusion.apply()
+
+    def gelu_approximation(self):
+        fusion = FusionGeluApproximation(self)
+        fusion.apply()
+
+    def fuse_add_bias_skip_layer_norm(self):
+        fusion = FusionBiasSkipLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_reshape(self):
+        fusion = FusionReshape(self)
+        fusion.apply()
+
+    def fuse_shape(self):
+        fusion = FusionShape(self)
+        fusion.apply()
+
+    def fuse_embed_layer(self):
+        fusion = FusionEmbedLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_layer_norm(self):
+        fusion = FusionLayerNormalization(self, self.hidden_size)
+        fusion.apply()
+
+        fusion = FusionLayerNormalizationTF(self)
+        fusion.apply()
+
+        # Only relevant in models with Q-DQ nodes
+        fusion = FusionQOrderedLayerNormalization(self)
+        fusion.apply()
+
+    def fuse_skip_layer_norm(self):
+        fusion = FusionSkipLayerNormalization(self)
+        fusion.apply()
+
+    # Only relevant in models with Q-DQ nodes
+    def fuse_qordered_mamtul(self):
+        fusion = FusionQOrderedMatMul(self)
+        fusion.apply()
+
+    def fuse_omdet_inverse_sigmoid(self):
+        fusion = FusionLayerInverseSigmoid(self)
+        fusion.apply()
+
+    def fuse_omdet_attention(self):
+        fusion = FusionLayerOmdetAttention(self)
+        fusion.apply()
+
+    def fuse_l2_normalization(self):
+        fusion = FusionLayerL2Normalization(self)
+        fusion.apply()
+        
+    def fuse_splitQKV_update_kv_cache(self):        
+        fusion = FusionCosyVoiceSplitQKVUpdateKVCache(self, self.hidden_size, self.num_heads)
+        fusion.apply()
+        
+    def fuse_cosyvoice_attention(self):
+        fusion = FusionCosyvoiceAttention(self)
+        fusion.apply() 
+    
+    def fuse_cosyvoice_split_qkv(self):
+        fusion = FusionSplitQKV(self, self.hidden_size, self.num_heads)
+        fusion.apply()       
+   
+
+    def get_graph_inputs_from_node_type(
+        self, op_type: str, input_indices: List[int], casted: bool
+    ):
+        """
+        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
+        Returns a list of the graph input names based on the filter whether it is casted or not.
+        """
+        graph_inputs = []
+
+        output_name_to_node = self.output_name_to_node()
+        nodes = self.get_nodes_by_op_type(op_type)
+        for node in nodes:
+            bert_inputs = [node.input[i] for i in input_indices if i < len(node.input)]
+            for bert_input in bert_inputs:
+                if self.find_graph_input(bert_input):
+                    if not casted:
+                        graph_inputs.append(bert_input)
+                elif bert_input in output_name_to_node:
+                    parent = output_name_to_node[bert_input]
+                    if (
+                        parent.op_type == "Cast"
+                        and self.find_graph_input(parent.input[0]) is not None
+                    ):
+                        if casted:
+                            graph_inputs.append(parent.input[0])
+        return graph_inputs
+
+    def get_graph_inputs_from_fused_nodes(self, casted: bool):
+        inputs = self.get_graph_inputs_from_node_type(
+            "EmbedLayerNormalization", [0, 1, 7], casted
+        )
+        inputs += self.get_graph_inputs_from_node_type("Attention", [3], casted)
+        return inputs
+
+    def change_graph_input_type(
+        self,
+        graph: GraphProto,
+        graph_input: ValueInfoProto,
+        new_type: int = TensorProto.INT32,
+    ):
+        """Change graph input type, and add Cast node if needed.
+
+        Args:
+            graph (GraphProto): graph
+            graph_input (TensorProto): input of the graph
+            new_type (int, optional): new data type. Defaults to TensorProto.INT32.
+
+        Returns:
+            NodeProto: a new Cast node that added. None if Cast node is not added.
+            List[NodeProto]: Cast nodes that have been removed.
+        """
+        assert isinstance(graph, GraphProto)
+        assert isinstance(graph_input, ValueInfoProto)
+        assert self.find_graph_input(graph_input.name)
+
+        if graph_input.type.tensor_type.elem_type == int(new_type):
+            return None, []
+
+        new_cast_node = None
+        nodes_to_remove = []
+
+        input_name_to_nodes = self.input_name_to_nodes()
+        if graph_input.name in input_name_to_nodes:
+            nodes = input_name_to_nodes[graph_input.name]
+
+            # For children that is not Cast node, insert a Cast node to convert int32 to original data type.
+            nodes_not_cast = [node for node in nodes if node.op_type != "Cast"]
+            if nodes_not_cast:
+                node_name = self.create_node_name("Cast")
+                output_name = node_name + "_" + graph_input.name
+                new_value_info = graph.value_info.add()
+                new_value_info.CopyFrom(graph_input)
+                new_value_info.name = output_name
+                new_cast_node = helper.make_node(
+                    "Cast",
+                    [graph_input.name],
+                    [output_name],
+                    to=int(graph_input.type.tensor_type.elem_type),
+                    name=node_name,
+                )
+                graph.node.extend([new_cast_node])
+
+                for node in nodes_not_cast:
+                    OnnxModel.replace_node_input(node, graph_input.name, output_name)
+
+            # For children that is Cast node, no need to insert Cast.
+            # When the children is Cast to int32, we can remove that Cast node since input type is int32 now.
+            nodes_cast = [node for node in nodes if node.op_type == "Cast"]
+            for node in nodes_cast:
+                if OnnxModel.get_node_attribute(node, "to") == int(new_type):
+                    self.replace_input_of_all_nodes(node.output[0], graph_input.name)
+                if not self.find_graph_output(node.output[0]):
+                    nodes_to_remove.append(node)
+            if nodes_to_remove:
+                self.remove_nodes(nodes_to_remove)
+
+        graph_input.type.tensor_type.elem_type = int(new_type)
+        return new_cast_node, nodes_to_remove
+
+    def change_graph_inputs_to_int32(self):
+        """Change data type of all graph inputs to int32 type, and add Cast node if needed."""
+        graph = self.graph()
+        add_cast_count = 0
+        remove_cast_count = 0
+        for graph_input in graph.input:
+            new_node, removed_nodes = self.change_graph_input_type(
+                graph, graph_input, TensorProto.INT32
+            )
+            if new_node:
+                add_cast_count += 1
+            remove_cast_count += len(removed_nodes)
+        logger.info(
+            f"Graph inputs are changed to int32. Added {add_cast_count} Cast nodes, and removed {remove_cast_count} Cast nodes."
+        )
+
+    def use_dynamic_axes(
+        self, dynamic_batch_dim="batch_size", dynamic_seq_len="max_seq_len"
+    ):
+        """
+        Update input and output shape to use dynamic axes.
+        """
+        bert_graph_inputs = self.get_graph_inputs_from_fused_nodes(
+            casted=True
+        ) + self.get_graph_inputs_from_fused_nodes(casted=False)
+
+        dynamic_batch_inputs = {}
+        for input in self.model.graph.input:
+            if input.name in bert_graph_inputs:
+                dim_proto = input.type.tensor_type.shape.dim[0]
+                dim_proto.dim_param = dynamic_batch_dim
+                if dynamic_seq_len is not None:
+                    dim_proto = input.type.tensor_type.shape.dim[1]
+                    dim_proto.dim_param = dynamic_seq_len
+
+        for output in self.model.graph.output:
+            dim_proto = output.type.tensor_type.shape.dim[0]
+            dim_proto.dim_param = dynamic_batch_dim
+
+    def preprocess(self):
+        self.adjust_reshape_and_expand()
+        return
+
+    def adjust_reshape_and_expand(self):
+        nodes_to_remove = []
+        for node in self.nodes():
+            if node.op_type == "Reshape":
+                # Clean up unneccessary reshape nodes.
+                # Find reshape nodes with no actually data in "shape" attribute and remove.
+                reshape_shape = self.get_constant_value(node.input[1])
+                if reshape_shape is not None and reshape_shape.size == 0:
+                    nodes_to_remove.extend([node])
+                    self.replace_input_of_all_nodes(node.output[0], node.input[0])
+                    continue
+
+                # Find path "Slice" -> "Reshape" -> "Expand" -> "Expand" -> current "Reshape", simplify the graph by
+                # changing current reshape's input to output of slice.
+                reshape_path = self.match_parent_path(
+                    node,
+                    ["Expand", "Expand", "Reshape", "Slice"],
+                    [0, 0, 0, 0],
+                    self.output_name_to_node(),
+                )
+                if reshape_path is not None:
+                    expand_node = reshape_path[-3]
+                    expand_shape_value = self.get_constant_value(expand_node.input[1])
+
+                    reshape_before_expand = reshape_path[-2]
+                    shape_value = self.get_constant_value(
+                        reshape_before_expand.input[1]
+                    )
+
+                    slice_node = reshape_path[-1]
+                    if (
+                        expand_shape_value is not None
+                        and shape_value is not None
+                        and len(expand_shape_value) == 2
+                        and len(shape_value) == 1
+                        and expand_shape_value[1] == shape_value[0]
+                    ):
+                        node.input[0] = slice_node.output[0]
+
+        if nodes_to_remove:
+            self.remove_nodes(nodes_to_remove)
+            logger.info(f"Removed Reshape and Expand count: {len(nodes_to_remove)}")
+
+    def clean_graph(self):
+        output_name_to_node = self.output_name_to_node()
+        nodes_to_remove = []
+        for node in self.nodes():
+            # Before:
+            #  input_ids --> Shape --> Gather(indices=0) --> Unsqueeze ------+
+            #          |                                                     |
+            #          |                                                     v
+            #          +----> Shape --> Gather(indices=1) --> Unsqueeze--->  Concat --> ConstantOfShape -->Cast --> EmbedLayerNormaliation/ReduceSum
+            # After:
+            #  input_ids --> Shape                                                  --> ConstantOfShape -->Cast --> EmbedLayerNormaliation/ReduceSum
+            # TODO: merge ConstantOfShape -->Cast to ConstantOfShape (need update the data type of value)
+            op_input_id = {"EmbedLayerNormalization": 1, "ReduceSum": 0, "Attention": 3}
+            if node.op_type in op_input_id:
+                i = op_input_id[node.op_type]
+                parent_nodes = self.match_parent_path(
+                    node,
+                    [
+                        "Cast",
+                        "ConstantOfShape",
+                        "Concat",
+                        "Unsqueeze",
+                        "Gather",
+                        "Shape",
+                    ],
+                    [i, 0, 0, 0, 0, 0],
+                    output_name_to_node,
+                )
+                if parent_nodes is not None:
+                    (
+                        cast,
+                        constantOfShape,
+                        concat,
+                        unsqueeze,
+                        gather,
+                        shape,
+                    ) = parent_nodes
+                    if shape.input[0] == self.graph().input[0].name:
+                        constantOfShape.input[0] = shape.output[0]
+                        output_name_to_node = self.output_name_to_node()
+
+            if node.op_type == "Attention":
+                # Before:
+                #   input_ids --> Shape -->ConstantOfShape -->Cast --> ReduceSum --> Attention
+                # After:
+                #   remove this path, and remove the optional mask_index input of Attention node.
+                parent_nodes = self.match_parent_path(
+                    node,
+                    ["ReduceSum", "Cast", "ConstantOfShape", "Shape"],
+                    [3, 0, 0, 0],
+                    output_name_to_node,
+                )
+                if parent_nodes is not None:
+                    if parent_nodes[-1].input[0] == self.graph().input[0].name:
+                        attention_node = helper.make_node(
+                            "Attention",
+                            inputs=node.input[0 : len(node.input) - 1],
+                            outputs=node.output,
+                            name=node.name + "_remove_mask",
+                        )
+                        attention_node.domain = "com.microsoft"
+                        attention_node.attribute.extend(
+                            [helper.make_attribute("num_heads", self.num_heads)]
+                        )
+                        self.add_node(
+                            attention_node, self.get_graph_by_node(attention_node).name
+                        )
+                        nodes_to_remove.append(node)
+        self.remove_nodes(nodes_to_remove)
+
+    def postprocess(self):
+        self.clean_graph()
+        self.prune_graph()
+
+    def optimize(
+        self, options: Optional[FusionOptions] = None, add_dynamic_axes: bool = False
+    ):
+        if (options is not None) and not options.enable_shape_inference:
+            self.disable_shape_inference()
+
+        self.utils.remove_identity_nodes()
+
+        # Remove cast nodes that having same data type of input and output based on symbolic shape inference.
+        self.utils.remove_useless_cast_nodes()
+
+        if (options is None) or options.enable_layer_norm:
+            self.fuse_layer_norm()
+
+        if (options is None) or options.enable_gelu:
+            self.fuse_gelu()
+
+        self.preprocess()
+
+        self.fuse_reshape()
+
+        if (options is None) or options.enable_skip_layer_norm:
+            self.fuse_skip_layer_norm()
+
+        if options.enable_swint_opt:
+            self.fuse_custom_fc()
+            self.fuse_swinT_serial_bias_add()
+
+        if options.enable_format_roformer:
+            self.fuse_format_roformer()
+
+        if options.enable_gpt2_classify or options.enable_vit:
+            self.fuse_custom_fc_gpt2_classify()
+
+        if options.enable_vit:
+            self.fuse_custom_fc()
+
+        if (options is None) or options.enable_attention:
+            if options is not None:
+                self.attention_mask.set_mask_format(options.attention_mask_format)
+            self.fuse_attention()
+
+        if (options is None) or options.enable_skip_layer_norm:
+            self.fuse_skip_layer_norm()
+
+        self.fuse_custom_fc()
+        
+        if options.enable_omdet:
+            self.fuse_omdet_attention()
+            self.fuse_omdet_inverse_sigmoid()
+            self.fuse_l2_normalization()
+            
+        self.fuse_splitQKV_update_kv_cache()
+        self.fuse_cosyvoice_attention()
+        self.fuse_cosyvoice_split_qkv()
+        
+        
+        # Perform the MatMul fusion after the Attention fusion as we do not
+        # want to fuse the MatMuls inside the Attention subgraphs
+        if (options is None) or options.enable_qordered_matmul:
+            self.fuse_qordered_mamtul()
+
+        self.fuse_shape()
+
+        if (options is None) or options.enable_embed_layer_norm:
+            self.fuse_embed_layer()
+
+        # Remove reshape nodes that having same shape of input and output based on symbolic shape inference.
+        self.utils.remove_useless_reshape_nodes()
+
+        self.postprocess()
+
+        # Bias fusion is done after postprocess to avoid extra Reshape between bias and Gelu/FastGelu/SkipLayerNormalization
+        if (options is None) or options.enable_bias_gelu:
+            # Fuse Gelu and Add Bias before it.
+            self.fuse_bias_gelu(is_fastgelu=True)
+            self.fuse_bias_gelu(is_fastgelu=False)
+
+        if (options is None) or options.enable_bias_skip_layer_norm:
+            # Fuse SkipLayerNormalization and Add Bias before it.
+            self.fuse_add_bias_skip_layer_norm()
+
+        if options is not None and options.enable_gelu_approximation:
+            self.gelu_approximation()
+
+        self.fuse_custom_fc_activation()
+        
+        if options.enable_vit:
+            self.fuse_custom_fc_torchvision_vit()
+
+        self.remove_unused_constant()
+
+        # Use symbolic batch dimension in input and output.
+        if add_dynamic_axes:
+            self.use_dynamic_axes()
+
+        logger.info(f"opset version: {self.get_opset_version()}")
+
+    def get_fused_operator_statistics(self):
+        """
+        Returns node count of fused operators.
+        """
+        op_count = {}
+        ops = [
+            "EmbedLayerNormalization",
+            "Attention",
+            "QOrderedAttention",
+            "Gelu",
+            "QOrderedGelu",
+            "FastGelu",
+            "BiasGelu",
+            "LayerNormalization",
+            "QOrderedLayerNormalization",
+            "SkipLayerNormalization",
+            "QOrderedMatMul",
+        ]
+        for op in ops:
+            nodes = self.get_nodes_by_op_type(op)
+            op_count[op] = len(nodes)
+        logger.info(f"Optimized operators:{op_count}")
+        return op_count
+
+    def is_fully_optimized(self):
+        """
+        Returns True when the model is fully optimized.
+        """
+        op_count = self.get_fused_operator_statistics()
+        embed = op_count["EmbedLayerNormalization"]
+        attention = op_count["Attention"] + op_count["QOrderedAttention"]
+        gelu = op_count["Gelu"] + op_count["BiasGelu"] + op_count["FastGelu"]
+        layer_norm = op_count["LayerNormalization"] + op_count["SkipLayerNormalization"]
+        is_perfect = (
+            (embed > 0)
+            and (attention > 0)
+            and (attention == gelu)
+            and (layer_norm >= 2 * attention)
+        )
+
+        if layer_norm == 0:
+            logger.debug("Layer Normalization not fused")
+
+        if gelu == 0:
+            logger.debug("Gelu/FastGelu not fused")
+
+        if embed == 0:
+            logger.debug("Embed Layer not fused")
+
+        if attention == 0:
+            logger.warning("Attention not fused")
+
+        return is_perfect
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_roformer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_roformer.py
index 85889319916199298dad2e9d2b47cde052c7c746..7bffb2e7cbec870423cd006d33a617dd1e70d1fb 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_roformer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_roformer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -537,4 +553,3 @@ class RoformerOnnxModel(OnnxModel):
             logger.warning("Attention not fused")
 
         return is_perfect
-
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_t5.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_t5.py
index 4b1d6b5fec3bfa10533527a72e475cca1bc63b86..dac070d24a66812c4b14cfeff5b7c78ff44c6711 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_t5.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_t5.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -28,7 +44,11 @@ from passes.fusion_skiplayernorm import (
     FusionBiasSkipLayerNormalization,
     FusionSkipLayerNormalization,
 )
-from passes.fusion_t5_attention import FusionT5Attention
+from passes.fusion_splitQKV_update_KVcache import FusionSplitQKVUpdateKVCache
+from passes.fusion_t5_attention import (
+    FusionT5DecoderAttention,
+    FusionT5EncoderAttention,
+)
 from passes.fusion_utils import FusionUtils
 from passes.onnx_model import OnnxModel
 
@@ -46,7 +66,7 @@ class BertOptimizationOptions(FusionOptions):
 
 
 class T5OnnxModel(OnnxModel):
-    def __init__(self, model: ModelProto, num_heads: int = 0, hidden_size: int = 0):
+    def __init__(self, model: ModelProto, num_heads=12, hidden_size=768):
         """Initialize T5 ONNX Model.
 
         Args:
@@ -61,7 +81,6 @@ class T5OnnxModel(OnnxModel):
         super().__init__(model)
         self.num_heads = num_heads
         self.hidden_size = hidden_size
-
         self.attention_mask = AttentionMask(self)
         self.attention_fusion = FusionAttention(
             self, self.hidden_size, self.num_heads, self.attention_mask
@@ -116,13 +135,17 @@ class T5OnnxModel(OnnxModel):
         fusion = FusionRMSNorm(self)
         fusion.apply()
 
-    def fuse_t5_attention(self):
-        fusion = FusionT5Attention(self)
+    def fuse_t5_encoder_attention(self):
+        fusion = FusionT5EncoderAttention(self)
+        fusion.apply()
+
+    def fuse_t5_decoder_attention(self):
+        fusion = FusionT5DecoderAttention(self)
         fusion.apply()
         # pass
 
     def fuse_layer_norm(self):
-        fusion = FusionLayerNormalization(self)
+        fusion = FusionLayerNormalization(self, hidden_size=768)
         fusion.apply()
 
         fusion = FusionLayerNormalizationTF(self)
@@ -136,6 +159,10 @@ class T5OnnxModel(OnnxModel):
         fusion = FusionSkipLayerNormalization(self)
         fusion.apply()
 
+    def fuse_splitQKV_update_kv_cache(self):
+        fusion = FusionSplitQKVUpdateKVCache(self, self.hidden_size, self.num_heads)
+        fusion.apply()
+
     # Only relevant in models with Q-DQ nodes
     def fuse_qordered_mamtul(self):
         fusion = FusionQOrderedMatMul(self)
@@ -433,7 +460,11 @@ class T5OnnxModel(OnnxModel):
 
         self.fuse_rms_norm()
 
-        self.fuse_t5_attention()
+        self.fuse_t5_encoder_attention()
+
+        self.fuse_t5_decoder_attention()
+
+        self.fuse_splitQKV_update_kv_cache()
 
         if (options is None) or options.enable_embed_layer_norm:
             self.fuse_embed_layer()
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_yolo.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_yolo.py
index 57982d0cc739fd766b5cc87a51479c62dabb22be..42b504c42edfc006b5efac0d385001780d296fb2 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_yolo.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/onnx_model_yolo.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/optimizer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/optimizer.py
index 701bd7a41f9a7b87249b1af5e6e8aaac6db4d53d..0f301e3a58e14713c7ebb26342a6fb39ecdca80e 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/optimizer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/optimizer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 import argparse
 import logging
 import time
@@ -10,6 +26,10 @@ from onnx_model_roformer import RoformerOnnxModel
 from onnx_model_conformer import conformerOnnxModel
 from onnx_model_t5 import T5OnnxModel
 from onnx_model_yolo import YoloOnnxModel
+from onnx_model_PVT import PVTOnnxModel
+from onnx_model_cosyvoice import cosyvoiceOnnxModel
+
+
 from onnxsim import simplify
 from passes.fusion_options import FusionOptions
 from passes.symbolic_shape_infer import SymbolicShapeInference
@@ -24,6 +44,10 @@ MODEL_TYPES = {
     "yolo": (YoloOnnxModel, None, "pytorch", 1),
     "vit": (BertOnnxModel, None, "pytorch", 1),
     "conformer": (conformerOnnxModel, None, "pytorch", 1),
+    "PVT": (PVTOnnxModel, None, "pytorch", 1),
+    "omdet": (BertOnnxModel, None, "pytorch", 1),
+    "cosyvoice": (cosyvoiceOnnxModel, None, "pytorch", 1)
+    
 }
 
 
@@ -81,48 +105,50 @@ def optimize_by_fusion(
 def optimize_to_ixrt(args):
     onnx_name = args.onnx[:-5]
     model = onnx.load(args.onnx)
-
-    logger.info("simplify..")
-    simplified_model, check = simplify(model)
-    logger.info("simplify model end...")
-    if args.dump_onnx:
-        onnx.save(simplified_model, onnx_name + "_sim.onnx")
-
-    # transfer to static shape and optimize it
-    static_sim_model = simplified_model
-    if args.input_shapes:
-        for input_tensor in simplified_model.graph.input:
-            if input_tensor.name in args.input_shapes.keys():
-                new_shape = args.input_shapes[input_tensor.name]
-                dim_list = []
-                for dim in new_shape:
-                    if isinstance(dim, int):
-                        dim_proto = onnx.TensorShapeProto.Dimension()
-                        dim_proto.dim_value = dim
-                        dim_list.append(dim_proto)
-                    elif isinstance(dim, str):
-                        dim_proto = onnx.TensorShapeProto.Dimension()
-                        dim_proto.dim_param = dim
-                        dim_list.append(dim_proto)
-
-                del input_tensor.type.tensor_type.shape.dim[:]
-                input_tensor.type.tensor_type.shape.dim.extend(dim_list)
-
-    try:
-        auto_merge = False
-        if args.model_type in ["roformer"]:
-            auto_merge = True
-        static_model = SymbolicShapeInference.infer_shapes(
-            simplified_model, 2**31 - 1, auto_merge, False, 3
-        )
-        static_sim_model, check = simplify(static_model)
+    if not args.not_sim:
+        logger.info("simplify..")
+        simplified_model, check = simplify(model)
+        logger.info("simplify model end...")
         if args.dump_onnx:
-            onnx.save(static_sim_model, onnx_name + "_sim_static_sim.onnx")
-    except Exception as e:
-        static_model = static_sim_model = simplified_model
+            onnx.save(simplified_model, onnx_name + "_sim.onnx")
+
+        # transfer to static shape and optimize it
+        static_sim_model = simplified_model
+        if args.input_shapes:
+            for input_tensor in simplified_model.graph.input:
+                if input_tensor.name in args.input_shapes.keys():
+                    new_shape = args.input_shapes[input_tensor.name]
+                    dim_list = []
+                    for dim in new_shape:
+                        if isinstance(dim, int):
+                            dim_proto = onnx.TensorShapeProto.Dimension()
+                            dim_proto.dim_value = dim
+                            dim_list.append(dim_proto)
+                        elif isinstance(dim, str):
+                            dim_proto = onnx.TensorShapeProto.Dimension()
+                            dim_proto.dim_param = dim
+                            dim_list.append(dim_proto)
+
+                    del input_tensor.type.tensor_type.shape.dim[:]
+                    input_tensor.type.tensor_type.shape.dim.extend(dim_list)
+
+        try:
+            auto_merge = False
+            if args.model_type in ["roformer"]:
+                auto_merge = True
+            static_model = SymbolicShapeInference.infer_shapes(
+                simplified_model, 2**31 - 1, auto_merge, False, 3
+            )
+            static_sim_model, check = simplify(static_model)
+            if args.dump_onnx:
+                onnx.save(static_sim_model, onnx_name + "_sim_static_sim.onnx")
+        except Exception as e:
+            static_model = static_sim_model = simplified_model
 
-    if args.dump_onnx:
-        onnx.save(static_model, onnx_name + "_sim_static.onnx")
+        if args.dump_onnx:
+            onnx.save(static_model, onnx_name + "_sim_static.onnx")
+    if args.not_sim:
+        static_sim_model = model
 
     logger.info("start fusion..")
     opt_model, _ = optimize_by_fusion(
@@ -171,7 +197,7 @@ def args_parser():
         "--model_type",
         type=str,
         default="bert",
-        choices=["bert", "swint", "roformer", "t5", "yolo", "gpt2", "vit", "conformer"],
+        choices=["bert", "swint", "roformer", "t5", "yolo", "gpt2", "vit", "conformer","PVT","omdet","cosyvoice"],
         help="Which kind of model to optimize",
     )
     parser.add_argument(
@@ -181,6 +207,13 @@ def args_parser():
         choices=["debug", "info", "error"],
         help="Which kind of model to optimize",
     )
+
+    parser.add_argument(
+        "--not_sim",
+        action="store_true",
+        default=False,
+        help="simplify model or not",
+    )
     return parser.parse_args()
 
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/__init__.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..de522e5b082b122a28b0a0423a40909598aa82d5 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/__init__.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/float16.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/float16.py
index 437e72fce0a316ab9d5041e86c6e9e864272a0b2..96da8751b0200bb8610e3dd5070f26ebc51e97ac 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/float16.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/float16.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -40,14 +56,24 @@ def convert_np_to_float16(np_array, min_positive_val=5.96e-08, max_finite_val=65
     def between(a, b, c):
         return np.logical_and(a < b, b < c)
 
-    np_array = np.where(between(0, np_array, min_positive_val), min_positive_val, np_array)
-    np_array = np.where(between(-min_positive_val, np_array, 0), -min_positive_val, np_array)
-    np_array = np.where(between(max_finite_val, np_array, float("inf")), max_finite_val, np_array)
-    np_array = np.where(between(float("-inf"), np_array, -max_finite_val), -max_finite_val, np_array)
+    np_array = np.where(
+        between(0, np_array, min_positive_val), min_positive_val, np_array
+    )
+    np_array = np.where(
+        between(-min_positive_val, np_array, 0), -min_positive_val, np_array
+    )
+    np_array = np.where(
+        between(max_finite_val, np_array, float("inf")), max_finite_val, np_array
+    )
+    np_array = np.where(
+        between(float("-inf"), np_array, -max_finite_val), -max_finite_val, np_array
+    )
     return np.float16(np_array)
 
 
-def convert_tensor_float_to_float16(tensor, min_positive_val=5.96e-08, max_finite_val=65504.0):
+def convert_tensor_float_to_float16(
+    tensor, min_positive_val=5.96e-08, max_finite_val=65504.0
+):
     """Convert tensor float to float16.
 
     Args:
@@ -63,13 +89,17 @@ def convert_tensor_float_to_float16(tensor, min_positive_val=5.96e-08, max_finit
     """
 
     if not isinstance(tensor, onnx_proto.TensorProto):
-        raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor))
+        raise ValueError(
+            "Expected input type is an ONNX TensorProto but got %s" % type(tensor)
+        )
 
     if tensor.data_type == onnx_proto.TensorProto.FLOAT:
         tensor.data_type = onnx_proto.TensorProto.FLOAT16
         # convert float_data (float type) to float16 and write to int32_data
         if tensor.float_data:
-            float16_data = convert_np_to_float16(np.array(tensor.float_data), min_positive_val, max_finite_val)
+            float16_data = convert_np_to_float16(
+                np.array(tensor.float_data), min_positive_val, max_finite_val
+            )
             int_list = _npfloat16_to_int(float16_data)
             tensor.int32_data[:] = int_list
             tensor.float_data[:] = []
@@ -78,7 +108,9 @@ def convert_tensor_float_to_float16(tensor, min_positive_val=5.96e-08, max_finit
             # convert n.raw_data to float
             float32_list = np.frombuffer(tensor.raw_data, dtype="float32")
             # convert float to float16
-            float16_list = convert_np_to_float16(float32_list, min_positive_val, max_finite_val)
+            float16_list = convert_np_to_float16(
+                float32_list, min_positive_val, max_finite_val
+            )
             # convert float16 to bytes and write back to raw_data
             tensor.raw_data = float16_list.tobytes()
     return tensor
@@ -168,10 +200,14 @@ def convert_float_to_float16(
     assert (
         min_positive_val >= 5.96e-08
     ), "invalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05"
-    assert max_finite_val <= float(np.finfo(np.float16).max), "invalid max_finite_val. largest float16 value: 65504"
+    assert max_finite_val <= float(
+        np.finfo(np.float16).max
+    ), "invalid max_finite_val. largest float16 value: 65504"
 
     func_infer_shape = None
-    if not disable_shape_infer and version.parse(onnx.__version__) >= version.parse("1.2.0"):
+    if not disable_shape_infer and version.parse(onnx.__version__) >= version.parse(
+        "1.2.0"
+    ):
         try:
             from onnx.shape_inference import infer_shapes
 
@@ -180,7 +216,9 @@ def convert_float_to_float16(
             pass
 
     if not isinstance(model, onnx_proto.ModelProto):
-        raise ValueError("Expected model type is an ONNX ModelProto but got %s" % type(model))
+        raise ValueError(
+            "Expected model type is an ONNX ModelProto but got %s" % type(model)
+        )
 
     # create blocklists
     if op_block_list is None:
@@ -206,8 +244,16 @@ def convert_float_to_float16(
     graph_io_to_skip = set()
     io_casts = set()
 
-    fp32_inputs = [n.name for n in model.graph.input if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT]
-    fp32_outputs = [n.name for n in model.graph.output if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT]
+    fp32_inputs = [
+        n.name
+        for n in model.graph.input
+        if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT
+    ]
+    fp32_outputs = [
+        n.name
+        for n in model.graph.output
+        if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT
+    ]
     if isinstance(keep_io_types, list):
         fp32_inputs = [n for n in fp32_inputs if n in keep_io_types]
         fp32_outputs = [n for n in fp32_outputs if n in keep_io_types]
@@ -227,7 +273,9 @@ def convert_float_to_float16(
             new_value_info.name = output_name
             new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
             # add Cast node (from tensor(float) to tensor(float16) after graph input
-            new_node = [helper.make_node("Cast", [n.name], [output_name], to=10, name=node_name)]
+            new_node = [
+                helper.make_node("Cast", [n.name], [output_name], to=10, name=node_name)
+            ]
             model.graph.node.extend(new_node)
             value_info_list.append(new_value_info)
             io_casts.add(node_name)
@@ -244,7 +292,9 @@ def convert_float_to_float16(
             new_value_info.CopyFrom(n)
             new_value_info.name = input_name
             new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
-            new_node = [helper.make_node("Cast", [input_name], [n.name], to=1, name=node_name)]
+            new_node = [
+                helper.make_node("Cast", [input_name], [n.name], to=1, name=node_name)
+            ]
             model.graph.node.extend(new_node)
             value_info_list.append(new_value_info)
             io_casts.add(node_name)
@@ -275,7 +325,9 @@ def convert_float_to_float16(
                         if n.output[i] in name_mapping:
                             n.output[i] = name_mapping[n.output[i]]
 
-                    is_node_blocked = n.op_type in op_block_list or n.name in node_block_list
+                    is_node_blocked = (
+                        n.op_type in op_block_list or n.name in node_block_list
+                    )
                     for input in n.input:
                         if input in fp32_initializers:
                             fp32_initializers[input].add_node(n, is_node_blocked)
@@ -296,9 +348,15 @@ def convert_float_to_float16(
                 next_level.append(q.g)
                 for n in q.graphs:
                     next_level.append(n)
-                q.t.CopyFrom(convert_tensor_float_to_float16(q.t, min_positive_val, max_finite_val))
+                q.t.CopyFrom(
+                    convert_tensor_float_to_float16(
+                        q.t, min_positive_val, max_finite_val
+                    )
+                )
                 for n in q.tensors:
-                    n = convert_tensor_float_to_float16(n, min_positive_val, max_finite_val)
+                    n = convert_tensor_float_to_float16(
+                        n, min_positive_val, max_finite_val
+                    )
             # if q is graph, process input, output and value_info (ValueInfoProto)
             if isinstance(q, onnx_proto.GraphProto):
                 # Note that float initializers tracked by fp32_initializers will be processed later.
@@ -307,12 +365,19 @@ def convert_float_to_float16(
                 for n in itertools.chain(q.input, q.output, q.value_info):
                     if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT:
                         if n.name not in graph_io_to_skip:
-                            n.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
+                            n.type.tensor_type.elem_type = (
+                                onnx_proto.TensorProto.FLOAT16
+                            )
                             value_info_list.append(n)
                     if n.type.HasField("sequence_type"):
-                        if n.type.sequence_type.elem_type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT:
+                        if (
+                            n.type.sequence_type.elem_type.tensor_type.elem_type
+                            == onnx_proto.TensorProto.FLOAT
+                        ):
                             if n.name not in graph_io_to_skip:
-                                n.type.sequence_type.elem_type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
+                                n.type.sequence_type.elem_type.tensor_type.elem_type = (
+                                    onnx_proto.TensorProto.FLOAT16
+                                )
                                 value_info_list.append(n)
 
         queue = next_level
@@ -320,7 +385,9 @@ def convert_float_to_float16(
     for key, value in fp32_initializers.items():
         # By default, to avoid precision loss, do not convert an initializer to fp16 when it is used only by fp32 nodes.
         if force_fp16_initializers or value.fp16_nodes:
-            value.initializer = convert_tensor_float_to_float16(value.initializer, min_positive_val, max_finite_val)
+            value.initializer = convert_tensor_float_to_float16(
+                value.initializer, min_positive_val, max_finite_val
+            )
             value_info_list.append(make_value_info_from_tensor(value.initializer))
             if value.fp32_nodes and not force_fp16_initializers:
                 logger.info(
@@ -343,10 +410,16 @@ def convert_float_to_float16(
                     new_value_info.CopyFrom(value_info)
                     output_name = node.name + "_input_cast_" + str(i)
                     new_value_info.name = output_name
-                    new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT
+                    new_value_info.type.tensor_type.elem_type = (
+                        onnx_proto.TensorProto.FLOAT
+                    )
                     # add Cast node (from tensor(float16) to tensor(float) before current node
                     node_name = node.name + "_input_cast" + str(i)
-                    new_node = [helper.make_node("Cast", [input], [output_name], to=1, name=node_name)]
+                    new_node = [
+                        helper.make_node(
+                            "Cast", [input], [output_name], to=1, name=node_name
+                        )
+                    ]
                     model.graph.node.extend(new_node)
                     # change current node's input name
                     node.input[i] = output_name
@@ -362,10 +435,16 @@ def convert_float_to_float16(
                     new_value_info.CopyFrom(value_info)
                     input_name = node.name + "_output_cast_" + str(i)
                     new_value_info.name = input_name
-                    new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT
+                    new_value_info.type.tensor_type.elem_type = (
+                        onnx_proto.TensorProto.FLOAT
+                    )
                     # add Cast node (from tensor(float) to tensor(float16) after current node
                     node_name = node.name + "_output_cast" + str(i)
-                    new_node = [helper.make_node("Cast", [input_name], [output], to=10, name=node_name)]
+                    new_node = [
+                        helper.make_node(
+                            "Cast", [input_name], [output], to=10, name=node_name
+                        )
+                    ]
                     model.graph.node.extend(new_node)
                     # change current node's input name
                     node.output[i] = input_name
@@ -373,10 +452,14 @@ def convert_float_to_float16(
     return model
 
 
-def float_to_float16_max_diff(tensor, min_positive_val=5.96e-08, max_finite_val=65504.0):
+def float_to_float16_max_diff(
+    tensor, min_positive_val=5.96e-08, max_finite_val=65504.0
+):
     """Measure the maximum absolute difference after converting a float tensor to float16."""
     if not isinstance(tensor, onnx_proto.TensorProto):
-        raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor))
+        raise ValueError(
+            "Expected input type is an ONNX TensorProto but got %s" % type(tensor)
+        )
     if tensor.data_type != onnx_proto.TensorProto.FLOAT:
         raise ValueError("Expected tensor data type is float.")
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_inverse_sigmoid.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_inverse_sigmoid.py
new file mode 100644
index 0000000000000000000000000000000000000000..9862d9ee4bee8da619750b2544ddc48d35be0fa9
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_inverse_sigmoid.py
@@ -0,0 +1,85 @@
+
+from logging import getLogger
+from typing import Dict
+
+import numpy as np
+from onnx import TensorProto, helper
+
+from .fusion_base import Fusion
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+class FusionLayerInverseSigmoid(Fusion):
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "InverseSigmoid", "Clip"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+                     +------------Clip-----------+
+                     |                           |
+                     |                           v
+        [Root] -->  Clip-->  Sub  --> Clip --> Div --> Log
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        if len(children) != 2:
+            return
+
+        root_input = node.input[0]
+
+        if not ((children[0].op_type == "Sub" and children[1].op_type == "Clip") or (children[0].op_type == "Clip" and children[1].op_type == "Sub")):
+            return
+
+        log_node = None
+        for child in children:
+            log_node = self.model.find_first_child_by_type(
+                child, "Log", input_name_to_nodes, recursive=True
+            )
+            if log_node is not None:
+                break
+        if log_node is None:
+            return
+        parent_nodes = self.model.match_parent_path(
+            log_node,
+            ["Div", "Clip", "Sub", "Clip"],
+            [0, 1, 0, 1],
+            output_name_to_node,
+        )
+        if parent_nodes is None:
+            return
+
+        sub_node = parent_nodes[2]
+        if sub_node not in children:
+            return
+
+        div_node = parent_nodes[0]
+        div_parents_nodes = self.model.get_parents(div_node)
+        if len(div_parents_nodes) != 2:
+            return
+        if div_parents_nodes[0].op_type != "Clip":
+            return
+        if div_parents_nodes[0] not in children:
+            return
+
+        subgraph_nodes = [node]
+        subgraph_nodes.extend([log_node])
+        subgraph_nodes.extend(parent_nodes)
+        subgraph_nodes.extend([div_parents_nodes[0]])
+        _, eps_val = self.model.get_constant_input(div_parents_nodes[0])
+
+        self.nodes_to_remove.extend(subgraph_nodes)
+        inverse_sigmoid_node = helper.make_node(
+            "InverseSigmoid",
+            inputs=[node.input[0]],
+            outputs=[log_node.output[0]],
+            name=self.model.create_node_name(
+                "InverseSigmoid", name_prefix="InverseSigmoid"
+            ),
+        )
+        inverse_sigmoid_node.attribute.extend(
+            [helper.make_attribute("epsilon", float(eps_val))]
+        )
+        self.nodes_to_add.append(inverse_sigmoid_node)
+        self.node_name_to_graph_name[inverse_sigmoid_node.name] = self.this_graph_name
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_l2_normalization.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_l2_normalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfd1ed28eb8b0f3d7c65b1e31da8c1dc45415ce7
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_l2_normalization.py
@@ -0,0 +1,69 @@
+from logging import getLogger
+from typing import Dict
+
+import numpy as np
+from onnx import TensorProto, helper
+
+from .fusion_base import Fusion
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+class FusionLayerL2Normalization(Fusion):
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "L2Normalization", "Abs"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+            +-------------------------------------------------------+
+            |                                                       |
+            |                                                       v
+        [Root] -->  Abs-->  Pow  --> ReduceSum --> Pow --> Clip --> Div
+        """
+        pow1_nodes = self.model.get_children(node, input_name_to_nodes)
+        if len(pow1_nodes) != 1 or pow1_nodes[0].op_type != "Pow":
+            return
+
+        reduce_nodes = self.model.get_children(pow1_nodes[0], input_name_to_nodes)
+        if len(reduce_nodes) != 1 or reduce_nodes[0].op_type != "ReduceSum":
+            return
+
+        pow2_nodes = self.model.get_children(reduce_nodes[0], input_name_to_nodes)
+        if len(pow2_nodes) != 1 or pow2_nodes[0].op_type != "Pow":
+            return
+
+        clip_nodes = self.model.get_children(pow2_nodes[0], input_name_to_nodes)
+        if len(clip_nodes) != 1 or clip_nodes[0].op_type != "Clip":
+            return
+
+        div_nodes = self.model.get_children(clip_nodes[0], input_name_to_nodes)
+        if len(div_nodes) != 1 or div_nodes[0].op_type != "Div":
+            return
+
+        root_input = node.input[0]
+        if div_nodes[0].input[0] != root_input:
+            return
+
+        subgraph_nodes = [node, pow1_nodes[0], reduce_nodes[0], pow2_nodes[0], clip_nodes[0], div_nodes[0]]
+        _, eps_val = self.model.get_constant_input(clip_nodes[0])
+        _, norm_axes = self.model.get_constant_input(reduce_nodes[0])
+        norm_axes = norm_axes.astype(np.int32)
+
+        self.nodes_to_remove.extend(subgraph_nodes)
+        l2_normalization_node = helper.make_node(
+            "L2Normalization",
+            inputs=[node.input[0]],
+            outputs=[div_nodes[0].output[0]],
+            name=self.model.create_node_name(
+                "L2Normalization", name_prefix="L2Normalization"
+            ),
+        )
+        l2_normalization_node.attribute.extend(
+            [helper.make_attribute("epsilon", float(eps_val)), 
+             helper.make_attribute("axes", norm_axes),
+             helper.make_attribute("axes_length", int(norm_axes.size))]
+        )
+        self.nodes_to_add.append(l2_normalization_node)
+        self.node_name_to_graph_name[l2_normalization_node.name] = self.this_graph_name
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_omdet_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_omdet_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..3451731f835ef05d8e61e0b5da2ef724be808f17
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_omdet_attention.py
@@ -0,0 +1,149 @@
+
+from logging import getLogger
+from typing import Dict
+
+import math
+import numpy as np
+from onnx import TensorProto, helper
+
+from .fusion_base import Fusion
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+class FusionLayerOmdetAttention(Fusion):
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "CustomQKVToContextPluginDynamic_IxRT", "CustomFCPluginDynamic_IxRT"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+        [Root] -->  CustomFCPluginDynamic_IxRT-->  CustomQKVToContextPluginDynamic_IxRT  --> CustomFCPluginDynamic_IxRT
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        parent = self.model.get_parents(node, output_name_to_node)
+        
+        if len(children) != 1:
+            return
+        if len(parent) != 1:
+            return
+
+        fc_first_node = None
+        for par in parent:
+            fc_first_node = self.model.find_first_parent_by_type(
+                par, "CustomFCPluginDynamic_IxRT", output_name_to_node, recursive=True
+            )
+            if fc_first_node is not None:
+                break
+        if fc_first_node is None:
+            return
+        
+        start_node = node
+        
+        # v path
+        v_nodes = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 1, 0, 0],
+            output_name_to_node,
+        )
+        
+        # path1, q and k path
+        q_nodes = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Softmax", "Add", "MatMul", "Transpose", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        k_nodes = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Softmax", "Add", "MatMul", "Mul", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+    
+        # path2, q and k path
+        q_nodes_1 = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Softmax", "Reshape", "Add", "Reshape", "Add", "MatMul", "Transpose", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        k_nodes_1 = self.model.match_parent_path(
+            start_node,
+            ["Reshape", "Transpose", "MatMul", "Softmax", "Reshape", "Add", "Reshape", "Add", "MatMul", "Mul", "Gather", "Transpose", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        if v_nodes is None:
+            return
+        
+        if v_nodes and q_nodes and k_nodes:
+            subgraph_nodes = []
+            subgraph_nodes.extend(q_nodes)
+            subgraph_nodes.extend(k_nodes)
+            subgraph_nodes.extend(v_nodes)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            
+            add_node = q_nodes[4]
+            hidden_size = start_node.attribute[0].i
+            _, mul_val = self.model.get_constant_input(k_nodes[6])
+            num_heads = hidden_size // math.floor((1/mul_val)*(1/ mul_val))
+            attention_input_1_name = add_node.input[1]
+        
+        if v_nodes and q_nodes_1 and k_nodes_1:
+            subgraph_nodes = []
+            subgraph_nodes.extend(q_nodes_1)
+            subgraph_nodes.extend(k_nodes_1)
+            subgraph_nodes.extend(v_nodes)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            
+            hidden_size = start_node.attribute[0].i
+            _, mul_val = self.model.get_constant_input(k_nodes_1[9])
+            num_heads = hidden_size // math.floor((1/mul_val)*(1/ mul_val))
+            
+            add_1 = self.model.get_initializer(q_nodes_1[5].input[1], True)
+            add_2 = self.model.get_initializer(q_nodes_1[7].input[1], True)
+            add_all = np.squeeze(add_1 + add_2)
+            
+            attention_input_1_name = "attention_" + q_nodes_1[5].input[1]
+            attention_input_1 = helper.make_tensor(
+                attention_input_1_name, TensorProto.FLOAT, add_all.shape, add_all.flatten().tolist())
+            
+            self.model.add_initializer(attention_input_1, self.this_graph_name)
+            
+        attention_node = helper.make_node(
+            "CustomQKVToContextPluginDynamic_IxRT",
+            inputs=[fc_first_node.output[0], attention_input_1_name],
+            outputs=[start_node.input[0]],
+            name=self.model.create_node_name(
+                "OmdetAttention", name_prefix="OmdetAttention"
+            ),
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
+        attention_node.attribute.extend([helper.make_attribute("hidden_size", hidden_size)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", 1)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("has_qk_bias", 1)])
+        
+        self.nodes_to_remove.extend(subgraph_nodes_unique)
+        
+        self.nodes_to_add.append(attention_node)
+        self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
+        
+        
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_series_bias_add.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_series_bias_add.py
index e1fde76f93917ecdce2b22defc5dc5d4bd5bdaea..bb9a1cab034aaf714b416ea971ac9e6d69884894 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_series_bias_add.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fuse_series_bias_add.py
@@ -1,11 +1,28 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 from logging import getLogger
 
+import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_utils import NumpyHelper
-from onnx import NodeProto, TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
-import numpy as np
-import onnx
 
 logger = getLogger(__name__)
 
@@ -35,7 +52,7 @@ class FusionSerialBiasAdd(Fusion):
 
         biases = [
             self.model.get_initializer(add_1st.input[1]),
-            self.model.get_initializer(add_2nd.input[1])
+            self.model.get_initializer(add_2nd.input[1]),
         ]
         if not all(biases):
             return
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_PVT_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_PVT_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d4cc73a9dcb1c8d31d778b380bd0e8a13f454e9
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_PVT_attention.py
@@ -0,0 +1,130 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import math
+from enum import Enum
+from logging import getLogger
+from os import name
+from sys import path
+from typing import Tuple, Union
+
+import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_options import AttentionMaskFormat
+from .fusion_utils import FusionUtils, NumpyHelper
+from .onnx_model import OnnxModel
+from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
+
+logger = getLogger(__name__)
+
+
+class FusionPVTAttention(Fusion):
+    """
+    Fuse FusionPVTAttention subgraph into one Attention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+    ):
+        super().__init__(
+            model,
+            "CustomQkvCrossToContext_IxRT",
+            ["Softmax"],
+        )
+
+        # Flags to show warning only once
+        self.num_heads_warning = False
+        self.hidden_size_warning = False
+
+
+    def create_decoder_attention_node(
+        self, inputs: str, outputs: str, type_mask: int, has_mask: int,scale: float
+    ) -> Union[NodeProto, None]:
+        """Create an Attention node.
+
+        Args:
+            input (str): input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+
+        attention_node_name = self.model.create_node_name("cross_Attention")
+        attention_node = helper.make_node(
+            "CustomQkvCrossToContext_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=attention_node_name,
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("scale", scale)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", has_mask)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("type_mask", type_mask)])
+
+        return attention_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        """
+        path:
+
+         (query) ---------------->MatMul ---->Mul --->softmax --->MatMul--->
+                                    /                             /
+         (key)   ---->Transpose -->                              /
+                                                                /
+                                                               /
+                                                              /
+         (value)--------------------------------------------->
+
+        """
+
+        start_node = node
+        qkv_paths = {
+            "path": (["Mul", "MatMul", "Transpose"], [0, 0, 0]),  # cross attention qery pass
+        }
+
+        qkv_nodes, qkv_path = self.match_parent_path_from_dict(start_node, qkv_paths)
+        if qkv_nodes is None:
+            logger.debug("fuse_attention: failed to match qkv path")
+            return
+        next_nodes = self.model.get_children(node)
+        if len(next_nodes) == 0:
+            return
+
+        if next_nodes[0].op_type != "MatMul":
+            return
+
+        second_matmul_node = next_nodes[0]
+        attention_outputs = second_matmul_node.output
+        remove_nodes = [second_matmul_node, node]
+
+
+
+        (mul_node, first_matmul_node, transpose_node) = qkv_nodes
+        transpose_nodes = self.model.get_parents(first_matmul_node)
+        
+        q_input = transpose_nodes[0].output[0]
+        k_input = transpose_nodes[1].input[0]
+        v_input = second_matmul_node.input[1]
+        attention_inputs = [q_input, k_input, v_input]
+        remove_nodes.extend([first_matmul_node, mul_node, transpose_nodes[1]])
+
+        has_mask = 0
+        type_mask = 4 
+        
+        scale =  numpy_helper.to_array(self.model.get_initializer(mul_node.input[1])).item()                
+        atten_node = self.create_decoder_attention_node(
+            attention_inputs, attention_outputs, type_mask, has_mask,scale
+        )
+        self.nodes_to_add.append(atten_node)
+        self.node_name_to_graph_name[atten_node.name] = self.this_graph_name
+        self.nodes_to_remove.extend(remove_nodes)
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_albert_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_albert_attention.py
index 47b8ec777a026d97256547d80e5a3c9d6ef77c2d..a3e31fe7dd164b86cf9e6f4e476bc0b31246e747 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_albert_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_albert_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -66,12 +82,11 @@ class FusionAlbertAttention(Fusion):
         """
 
         # we assume that reshape fusion has done, so the shape is a tensor like [0, 0, num_heads, head_size]
-        q_shape = self.model.get_initializer(reshape_q.input[1])
-        if q_shape is None:
+        q_shape_value = self.model.get_constant_value(reshape_q.input[1])
+        if q_shape_value is None:
             logger.debug(f"{reshape_q.input[1]} is not initializer.")
             return self.num_heads, self.hidden_size  # Fall back to user specified value
 
-        q_shape_value = NumpyHelper.to_array(q_shape)
         if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
             logger.debug(
                 f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
@@ -413,11 +428,13 @@ class FusionAlbertAttention(Fusion):
 
         is_distill = False
         is_distill_add = False
+        is_mul_split = False
         qk_paths = {
             "path1": (["Softmax", "Add", "Div", "MatMul"], [0, 0, None, 0]),
             "path2": (["Softmax", "Add", "Mul", "MatMul"], [0, 0, None, 0]),
             "path3": (["Softmax", "Where", "MatMul", "Div"], [0, 0, 2, 0]),
             "path4": (["Softmax", "Add", "Where", "MatMul"], [0, 0, 0, 2]),
+            "path5": (["Softmax", "Add", "MatMul"], [0, 0, None])
         }
 
         qk_nodes = None
@@ -429,12 +446,13 @@ class FusionAlbertAttention(Fusion):
                 is_distill = True
             if k == "path4":
                 is_distill_add = True
+            if k == "path5":
+                is_mul_split = True
             break
 
         if qk_nodes is None:
             logger.debug("fuse_attention: failed to match qk path")
             return
-
         add_qk = None
         matmul_qk = None
         where_qk = None
@@ -442,6 +460,8 @@ class FusionAlbertAttention(Fusion):
             (_, where_qk, matmul_qk, _) = qk_nodes
         elif is_distill_add:
             (_, add_qk, where_qk, matmul_qk) = qk_nodes
+        elif is_mul_split:
+            (_, add_qk, matmul_qk) = qk_nodes
         else:
             (_, add_qk, _, matmul_qk) = qk_nodes
 
@@ -454,6 +474,12 @@ class FusionAlbertAttention(Fusion):
                 ["Div", "Transpose", "Reshape", "Add", "MatMul"],
                 [0, 0, 0, 0, None],
             )
+            if q_nodes is None and is_mul_split:
+                q_nodes = self.model.match_parent_path(
+                    matmul_qk,
+                    ["Mul", "Transpose", "Reshape", "Add", "MatMul"],
+                    [0, 0, 0, 0, None],
+                )
             if q_nodes is None:
                 logger.debug("fuse_attention: failed to match q path")
                 return
@@ -470,6 +496,13 @@ class FusionAlbertAttention(Fusion):
                 ["Transpose", "Transpose", "Reshape", "Add", "MatMul"],
                 [1, 0, 0, 0, None],
             )
+            if k_nodes is None and is_mul_split:
+                k_nodes = self.model.match_parent_path(
+                    matmul_qk,
+                    ["Mul", "Transpose", "Reshape", "Add", "MatMul"],
+                    [1, 0, 0, 0, None],
+                )
+            
             if k_nodes is None:
                 logger.debug("fuse_attention: failed to match k path")
                 return
@@ -505,6 +538,14 @@ class FusionAlbertAttention(Fusion):
                         f"fuse_attention: failed to verify shape inference of {add_qk}"
                     )
                     return
+        elif is_mul_split:
+            _, mask_nodes, _ = self.model.match_parent_paths(
+                add_qk,
+                [
+                    (["Where", "Cast", "Sub", "Cast", "Expand", "Unsqueeze"], [None, 0, 0, 1, 0, 0])
+                ],
+                output_name_to_node,
+            )
         else:
             _, mask_nodes, _ = self.model.match_parent_paths(
                 add_qk,
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_attention.py
index c750721836750e7826a06e83a71138001dc79510..38ddf62986b46b350cdf158eeccfcf1e3602fe0c 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -9,10 +25,11 @@ from sys import path
 from typing import Tuple, Union
 
 import numpy as np
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_options import AttentionMaskFormat
 from .fusion_utils import FusionUtils, NumpyHelper
-from onnx import NodeProto, TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
 
@@ -75,7 +92,9 @@ class AttentionMask:
             outputs=[output_name],
             name=self.model.create_node_name("ReduceSum", "MaskReduceSum"),
         )
-        mask_index_node.attribute.extend([helper.make_attribute("axes", [1]), helper.make_attribute("keepdims", 0)])
+        mask_index_node.attribute.extend(
+            [helper.make_attribute("axes", [1]), helper.make_attribute("keepdims", 0)]
+        )
         self.model.add_node(mask_index_node)
 
         self.mask_indice[input] = output_name
@@ -94,7 +113,9 @@ class FusionAttention(Fusion):
         num_heads: int,
         attention_mask: AttentionMask,
     ):
-        super().__init__(model, "Attention", ["SkipLayerNormalization", "LayerNormalization"])
+        super().__init__(
+            model, "Attention", ["SkipLayerNormalization", "LayerNormalization"]
+        )
         self.hidden_size = hidden_size
         self.num_heads = num_heads
         self.attention_mask = attention_mask
@@ -121,7 +142,9 @@ class FusionAttention(Fusion):
 
         q_shape_value = NumpyHelper.to_array(q_shape)
         if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
-            logger.debug(f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size].")
+            logger.debug(
+                f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
             return self.num_heads, self.hidden_size  # Fall back to user specified value
 
         num_heads = q_shape_value[2]
@@ -130,7 +153,9 @@ class FusionAttention(Fusion):
 
         if self.num_heads > 0 and num_heads != self.num_heads:
             if self.num_heads_warning:
-                logger.warning(f"--num_heads is {self.num_heads}. Detected value is {num_heads}. Using detected value.")
+                logger.warning(
+                    f"--num_heads is {self.num_heads}. Detected value is {num_heads}. Using detected value."
+                )
                 self.num_heads_warning = False  # Do not show the warning more than once
 
         if self.hidden_size > 0 and hidden_size != self.hidden_size:
@@ -138,7 +163,9 @@ class FusionAttention(Fusion):
                 logger.warning(
                     f"--hidden_size is {self.hidden_size}. Detected value is {hidden_size}. Using detected value."
                 )
-                self.hidden_size_warning = False  # Do not show the warning more than once
+                self.hidden_size_warning = (
+                    False  # Do not show the warning more than once
+                )
 
         return num_heads, hidden_size
 
@@ -196,15 +223,23 @@ class FusionAttention(Fusion):
         assert num_heads > 0
 
         if hidden_size > 0 and (hidden_size % num_heads) != 0:
-            logger.debug(f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}")
+            logger.debug(
+                f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}"
+            )
             return None
 
         q_weight = self.model.get_initializer(q_matmul.input[1])
         k_weight = self.model.get_initializer(k_matmul.input[1])
         v_weight = self.model.get_initializer(v_matmul.input[1])
-        q_bias = self.model.get_initializer(q_add.input[1]) or self.model.get_initializer(q_add.input[0])
-        k_bias = self.model.get_initializer(k_add.input[1]) or self.model.get_initializer(k_add.input[0])
-        v_bias = self.model.get_initializer(v_add.input[1]) or self.model.get_initializer(v_add.input[0])
+        q_bias = self.model.get_initializer(
+            q_add.input[1]
+        ) or self.model.get_initializer(q_add.input[0])
+        k_bias = self.model.get_initializer(
+            k_add.input[1]
+        ) or self.model.get_initializer(k_add.input[0])
+        v_bias = self.model.get_initializer(
+            v_add.input[1]
+        ) or self.model.get_initializer(v_add.input[0])
 
         if q_weight is None:
             print(
@@ -283,7 +318,11 @@ class FusionAttention(Fusion):
 
         # Sometimes weights and bias are stored in fp16
         if q_weight.data_type == 10:
-            weight.CopyFrom(numpy_helper.from_array(NumpyHelper.to_array(weight).astype(np.float16), weight.name))
+            weight.CopyFrom(
+                numpy_helper.from_array(
+                    NumpyHelper.to_array(weight).astype(np.float16), weight.name
+                )
+            )
         self.model.add_initializer(weight, self.this_graph_name)
 
         bias = helper.make_tensor(
@@ -293,7 +332,11 @@ class FusionAttention(Fusion):
             vals=qkv_bias.flatten().tolist(),
         )
         if q_bias.data_type == 10:
-            bias.CopyFrom(numpy_helper.from_array(NumpyHelper.to_array(bias).astype(np.float16), bias.name))
+            bias.CopyFrom(
+                numpy_helper.from_array(
+                    NumpyHelper.to_array(bias).astype(np.float16), bias.name
+                )
+            )
         self.model.add_initializer(bias, self.this_graph_name)
 
         attention_inputs = [
@@ -321,7 +364,11 @@ class FusionAttention(Fusion):
 
         if is_qkv_diff_dims:
             attention_node.attribute.extend(
-                [helper.make_attribute("qkv_hidden_sizes", [qw_out_size, kw_out_size, vw_out_size])]
+                [
+                    helper.make_attribute(
+                        "qkv_hidden_sizes", [qw_out_size, kw_out_size, vw_out_size]
+                    )
+                ]
             )
 
         return attention_node
@@ -400,7 +447,9 @@ class FusionAttention(Fusion):
         if children_types.count("MatMul") != 3:
             return
 
-        v_nodes = self.model.match_parent_path(matmul_qkv, ["Transpose", "Reshape", "Add", "MatMul"], [1, 0, 0, None])
+        v_nodes = self.model.match_parent_path(
+            matmul_qkv, ["Transpose", "Reshape", "Add", "MatMul"], [1, 0, 0, None]
+        )
         if v_nodes is None:
             logger.debug("fuse_attention: failed to match v path")
             return
@@ -440,7 +489,9 @@ class FusionAttention(Fusion):
         else:
             (_, add_qk, _, matmul_qk) = qk_nodes
 
-        q_nodes = self.model.match_parent_path(matmul_qk, ["Transpose", "Reshape", "Add", "MatMul"], [0, 0, 0, None])
+        q_nodes = self.model.match_parent_path(
+            matmul_qk, ["Transpose", "Reshape", "Add", "MatMul"], [0, 0, 0, None]
+        )
         if q_nodes is None:
             q_nodes = self.model.match_parent_path(
                 matmul_qk,
@@ -454,7 +505,9 @@ class FusionAttention(Fusion):
         add_q = q_nodes[-2]
         matmul_q = q_nodes[-1]
 
-        k_nodes = self.model.match_parent_path(matmul_qk, ["Transpose", "Reshape", "Add", "MatMul"], [1, 0, 0, None])
+        k_nodes = self.model.match_parent_path(
+            matmul_qk, ["Transpose", "Reshape", "Add", "MatMul"], [1, 0, 0, None]
+        )
         if k_nodes is None:
             k_nodes = self.model.match_parent_path(
                 matmul_qk,
@@ -492,7 +545,9 @@ class FusionAttention(Fusion):
             if add_qk is not None:
                 add_qk_str = self.get_add_qk_str(add_qk)
                 if add_qk_str is None:
-                    logger.debug(f"fuse_attention: failed to verify shape inference of {add_qk}")
+                    logger.debug(
+                        f"fuse_attention: failed to verify shape inference of {add_qk}"
+                    )
                     return
         else:
             _, mask_nodes, _ = self.model.match_parent_paths(
@@ -510,7 +565,11 @@ class FusionAttention(Fusion):
             logger.debug("fuse_attention: failed to match mask path")
             return
 
-        if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_k.input[0] == root_input:
+        if (
+            matmul_v.input[0] == root_input
+            and matmul_q.input[0] == root_input
+            and matmul_k.input[0] == root_input
+        ):
             mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0])
 
             attention_last_node = reshape_qkv if einsum_node is None else transpose_qkv
@@ -545,7 +604,9 @@ class FusionAttention(Fusion):
                     name="shape_modified_tensor" + unique_index,
                     data_type=TensorProto.INT64,
                     dims=[4],
-                    vals=np.int64([0, 0, q_num_heads, int(q_hidden_size / q_num_heads)]).tobytes(),
+                    vals=np.int64(
+                        [0, 0, q_num_heads, int(q_hidden_size / q_num_heads)]
+                    ).tobytes(),
                     raw=True,
                 )
                 self.model.add_initializer(shape_tensor, self.this_graph_name)
@@ -560,7 +621,9 @@ class FusionAttention(Fusion):
                 )
                 einsum_node.input[0] = new_edge
 
-            self.nodes_to_remove.extend([attention_last_node, transpose_qkv, matmul_qkv])
+            self.nodes_to_remove.extend(
+                [attention_last_node, transpose_qkv, matmul_qkv]
+            )
             self.nodes_to_remove.extend(qk_nodes)
             self.nodes_to_remove.extend(q_nodes)
             self.nodes_to_remove.extend(k_nodes)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_base.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_base.py
index aaf742a45f7c8a56f1166a32d3b803fb497fe041..3732b0f5fab40cbb269f18abdd56286f298a5493 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_base.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_base.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_biasgelu.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_biasgelu.py
index 8e3406c7f231b04b6367b4311da315bf8eb3f7df..045cd99380a7535079d0f9f33322e2879d2074c0 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_biasgelu.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_biasgelu.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,9 +21,10 @@
 
 from logging import getLogger
 
+from onnx import helper
+
 from .fusion_base import Fusion
 from .fusion_utils import NumpyHelper
-from onnx import helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_attention.py
index e825f95cbe698d9831b7291d5b03336797a8db85..21161727373b1ceee5362bc2fa0e713f17e899ae 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_xsoftmax.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_xsoftmax.py
index 78a40973f37cb0ac9f089ad971a293cb906ffa53..b55c2412b07067d3ebb05cc080be6a3a31902e22 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_xsoftmax.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conformer_xsoftmax.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conv_reformat.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conv_reformat.py
new file mode 100644
index 0000000000000000000000000000000000000000..23cdd0c2d0dca61bf66eb1f484e3093f4d7bf0c6
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_conv_reformat.py
@@ -0,0 +1,128 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import math
+from enum import Enum
+from logging import getLogger
+from os import name
+from sys import path
+from typing import Tuple, Union
+
+import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_options import AttentionMaskFormat
+from .fusion_utils import FusionUtils, NumpyHelper
+from .onnx_model import OnnxModel
+from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
+
+logger = getLogger(__name__)
+
+
+class FusionConvReformat(Fusion):
+    """
+    Fuse FusionPVTAttention subgraph into one Attention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+    ):
+        super().__init__(
+            model,
+            "FuseConvReformat_IxRT",
+            ["Transpose"],
+        )
+
+
+
+    def create_fuse_node(
+        self, inputs: str, outputs: str, before_conv: int, shape_data: list, prefix
+    ) -> Union[NodeProto, None]:
+        """Create an Attention node.
+
+        Args:
+            input (str): input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+
+        node_name = self.model.create_node_name(f"FuseConvReformat_{prefix}")
+        node = helper.make_node(
+            "FuseConvReformat_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=node_name,
+        )
+        node.domain = "com.iluvatar"
+
+        node.attribute.extend([helper.make_attribute("before_conv", before_conv)])
+        node.attribute.extend([helper.make_attribute("shape_data", shape_data)])
+        node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        return node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        """
+        eliminate  Transpose(linear->nchw) + Transpose 
+        path: 
+        ----->Transpose ---->Reshape---> conv ----->Reshape ---->Transpose--->
+        
+        to:
+        ----->FuseConvReformat_IxRT---> conv ----->FuseConvReformat_IxRT--->
+        
+        """        
+        start_node = node
+        paths = {
+            "path": (["Reshape", "Conv", "Reshape","Transpose"], [0, 0, 0, 0]),  # cross attention qery pass
+        }
+
+        nodes, path = self.match_parent_path_from_dict(start_node, paths)
+        
+        if nodes is None:
+            logger.debug("FuseConvReformat: failed to match  path")
+            return
+        
+        (reshape_after_node, conv_node, reshape_before_node, tranpose_before_node) = nodes
+
+        perm1 = tranpose_before_node.attribute[0].ints
+        if perm1 !=[0, 2, 1]:
+            return
+        perm2 = start_node.attribute[0].ints
+        if perm2 !=[0, 2, 1]:
+            return
+        
+        before_shape_data  =  numpy_helper.to_array(self.model.get_initializer(reshape_before_node.input[1]))
+        
+        if before_shape_data.shape[0] != 4:
+            return
+        
+        after_shape_data  =  numpy_helper.to_array(self.model.get_initializer(reshape_after_node.input[1]))
+        if after_shape_data.shape[0] != 3:
+            return
+        node1_inputs = tranpose_before_node.input
+        node1_outputs = reshape_before_node.output
+        node1_before_conv = 1
+        
+        new_node1 = self.create_fuse_node(
+            node1_inputs, node1_outputs, node1_before_conv, before_shape_data,"before")
+        
+        
+        node2_inputs = conv_node.output
+        node2_outputs = start_node.output
+        node2_before_conv = 0
+        new_node2 = self.create_fuse_node(
+            node2_inputs, node2_outputs, node2_before_conv, after_shape_data,"after")
+        
+        self.nodes_to_add.append(new_node1)
+        self.nodes_to_add.append(new_node2)
+        self.node_name_to_graph_name[new_node1.name] = self.this_graph_name
+        self.node_name_to_graph_name[new_node2.name] = self.this_graph_name        
+        self.nodes_to_remove.extend([start_node, reshape_after_node,reshape_before_node,tranpose_before_node])
+
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bfa8768e7077fad40b9ef8ff51427db217a5069
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_attention.py
@@ -0,0 +1,210 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+import math
+from enum import Enum
+from logging import getLogger
+from os import name
+from sys import path
+from typing import Tuple, Union
+
+import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_options import AttentionMaskFormat
+from .fusion_utils import FusionUtils, NumpyHelper
+from .onnx_model import OnnxModel
+from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
+
+logger = getLogger(__name__)
+
+
+
+class FusionCosyvoiceAttention(Fusion):
+    """
+    Fuse T5Attention subgraph into one Attention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+    ):
+        super().__init__(
+            model,
+            "CustomQkvCrossToContext_IxRT",
+            ["Softmax"],
+        )
+
+        # Flags to show warning only once
+        self.num_heads_warning = True
+        self.hidden_size_warning = True
+
+    def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> Tuple[int, int]:
+        """Detect num_heads and hidden_size from a reshape node.
+
+        Args:
+            reshape_q (NodeProto): reshape node for Q
+
+        Returns:
+            Tuple[int, int]: num_heads and hidden_size
+        """
+
+        # we assume that reshape fusion has done, so the shape is a tensor like [0, 0, num_heads, head_size]
+        q_shape = self.model.get_initializer(reshape_q.input[1])
+        if q_shape is None:
+            logger.debug(f"{reshape_q.input[1]} is not initializer.")
+            return [0, 0]
+
+        q_shape_value = NumpyHelper.to_array(q_shape)
+        if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
+            logger.debug(
+                f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
+            return [0, 0]
+
+        num_heads = q_shape_value[2]
+        head_size = q_shape_value[3]
+        hidden_size = num_heads * head_size
+
+        return num_heads, hidden_size
+
+    def create_decoder_attention_node(
+        self, inputs: str, outputs: str, type_mask: int, has_mask: int, scale: float
+    ) -> Union[NodeProto, None]:
+        """Create an Attention node.
+
+        Args:
+            input (str): input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+
+        attention_node_name = self.model.create_node_name("decoder_Attention")
+        attention_node = helper.make_node(
+            "CustomQkvCrossToContext_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=attention_node_name,
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("scale", scale)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", has_mask)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("type_mask", type_mask)])
+
+        return attention_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        """
+         path1:
+
+         (query) --------------MatMul---Div --> add -->softmax --->MatMul--->
+                                 /             /                    /
+         (key)   ---->Transpose >             /                    /
+                                             /                    /
+         (mask)   ------------------------>                     /
+                                                               /
+         (value)--------------------------------------------->
+         """
+
+
+
+
+        import pdb
+        start_node = node
+        qkv_paths = {
+            "path1": (
+                ["Add", "Div", "MatMul", "Transpose"],
+                [None, 0, None, 1],
+            ),  # float mask self attention,self attention key pass
+        }
+
+        qkv_nodes, qkv_path = self.match_parent_path_from_dict(start_node, qkv_paths)
+        
+        if qkv_nodes is None:
+            logger.debug("fuse_attention: failed to match qkv path")
+            return
+        next_nodes = self.model.get_children(node)
+    
+        if len(next_nodes) == 0:
+            return
+                
+        if next_nodes[0].op_type != "MatMul":
+            return
+        
+        second_matmul_node = next_nodes[0]
+        attention_inputs = None
+        attention_outputs = second_matmul_node.output
+        remove_nodes = [second_matmul_node, node]
+
+        (add_node, div_node, first_matmul_node, transpose_node) = qkv_nodes
+        transpose_nodes = self.model.get_parents(first_matmul_node)
+        q_input = transpose_nodes[0].output[0]
+        
+        k_transpose_node = transpose_nodes[1]
+        k_transpose_node_perm = k_transpose_node.attribute[0].ints
+        
+        if  k_transpose_node_perm == [0, 2, 3, 1]:  #transpose has bean merge,[0,2,1,3]->[0, 1, 3, 2] = [0, 2, 3, 1]
+            k_input = transpose_nodes[1].output[0]
+            
+            transpose_nodes[1].attribute[0].ints[0] = 0 
+            transpose_nodes[1].attribute[0].ints[1] = 2 
+            transpose_nodes[1].attribute[0].ints[2] = 1 
+            transpose_nodes[1].attribute[0].ints[3] = 3 
+            
+            remove_nodes.extend([add_node, div_node, first_matmul_node])
+            
+        elif k_transpose_node_perm == [0, 1, 3, 2]:
+            k_input = transpose_nodes[1].input[0]
+            remove_nodes.extend([add_node, div_node, first_matmul_node,k_transpose_node])
+            
+        else:
+            return         
+        
+        v_input = second_matmul_node.input[1]
+        attention_inputs = [q_input, k_input, v_input]
+        
+        has_mask = 1
+        type_mask = 3 # float mask
+        
+        mask_input = add_node.input[0]
+        score_out = div_node.output[0]
+        if add_node.input[0] == score_out:
+            mask_input = add_node.input[1]
+        attention_inputs.append(mask_input)
+        
+        scale_data = self.model.get_initializer_input_edges(div_node.name, return_np_array = True)
+        scale = 1.0 / scale_data[0]
+        
+        atten_node = self.create_decoder_attention_node(
+            attention_inputs, attention_outputs, type_mask, has_mask, scale
+        )
+        
+        self.nodes_to_add.append(atten_node)
+        self.node_name_to_graph_name[atten_node.name] = self.this_graph_name
+        self.nodes_to_remove.extend(remove_nodes)
+        
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV.py
new file mode 100755
index 0000000000000000000000000000000000000000..d1a1baffd56aba589caa4251d7d841e9715b8f02
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import Tuple, Union
+
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_utils import NumpyHelper
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+
+class FusionSplitQKV(Fusion):
+    """
+    Fuse FusionSplitQKV
+    """
+
+    def __init__(self, model: OnnxModel, hidden_size: int, num_heads: int):
+        super().__init__(model, "SplitQKV_IxRT", "Split")
+
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+
+    def create_node(
+        self, inputs: list, outputs:list
+    ) -> Union[NodeProto, None]:
+        """Create an create node.
+
+        Args:
+            data_input (str): data input name
+            mask_input (str): max input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+        node_name = self.model.create_node_name("SplitQKV_IxRT")
+        
+        
+        k_cache_output = outputs[1]
+        v_cache_output = outputs[2]
+        
+        concat_k_input = k_cache_output + "_k_concat_input"
+        concat_v_input = v_cache_output + "_v_concat_input"
+        
+        plugin_outputs = [outputs[0],concat_k_input,concat_v_input]
+
+        new_node = helper.make_node(
+            "SplitQKV_IxRT",
+            inputs=inputs,
+            outputs=plugin_outputs,
+            name=node_name,
+        )
+        new_node.domain = "com.iluvatar"
+        new_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        new_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        new_node.attribute.extend(
+            [helper.make_attribute("atten_scale", 1.0)]
+        )
+        new_node.attribute.extend(
+            [helper.make_attribute("transpose", 1)]
+        )
+        new_node.attribute.extend([helper.make_attribute("num_head", self.num_heads)])
+        new_node.attribute.extend(
+            [helper.make_attribute("head_dim", self.hidden_size // self.num_heads)]
+        )
+        
+        
+        
+        k_concat_node_name = node_name + "_k_concat"
+        v_concat_node_name = node_name + "_v_concat"
+        
+        k_concat_node = helper.make_node(
+            "Identity",
+            inputs=[concat_k_input],
+            outputs=[outputs[1]],
+            name=k_concat_node_name,
+        )
+        
+        v_concat_node = helper.make_node(
+            "Identity",
+            inputs=[concat_v_input],
+            outputs=[outputs[2]],
+            name=v_concat_node_name,
+        )
+        
+        self.model.replace_input_of_all_nodes(outputs[1],concat_k_input)
+        self.model.replace_input_of_all_nodes(outputs[2],concat_v_input)
+        return new_node,k_concat_node,v_concat_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+        split_node = node
+        split_data = self.model.get_initializer_input_edges(node.name,return_np_array = True)
+        if split_data[0].shape != (3,):
+            return 
+        if split_data[0][0] != split_data[0][1] and  split_data[0][1] != split_data[0][2]:
+            return
+
+        q_input, k_input, v_input = node.output[0],node.output[1],node.output[2]  
+              
+        q_path_nodes= []
+        k_path_nodes= []
+        v_path_nodes= []
+        
+        reshape_nodes = self.model.get_children(node)
+        
+        for node in reshape_nodes:
+            if node.op_type != "Reshape":
+                return
+        q_reshape_node,k_reshape_node,v_reshape_node =  reshape_nodes[0],reshape_nodes[1],reshape_nodes[2]   
+                    
+        q_path_nodes.append(q_reshape_node)
+        k_path_nodes.append(k_reshape_node)    
+        v_path_nodes.append(v_reshape_node) 
+        
+        q_transpose_nodes = self.model.get_children(q_reshape_node) 
+        k_transpose_nodes = self.model.get_children(k_reshape_node) 
+        v_transpose_nodes = self.model.get_children(v_reshape_node)
+        
+        if  len(q_transpose_nodes)!=1 and  (not k_transpose_nodes) and len(v_transpose_nodes) != 1:
+            return
+        
+        
+        if (q_transpose_nodes[0].attribute[0].ints != [0, 2, 1, 3]) and (v_transpose_nodes[0].attribute[0].ints !=[0, 2, 1, 3]):
+                return 
+        
+        if len(k_transpose_nodes) == 2:
+            if (k_transpose_nodes[0].attribute[0].ints != k_transpose_nodes[1].attribute[0].ints) and (k_transpose_nodes[0].attribute[0].ints !=[0, 2, 1, 3]):
+                return 
+            
+        
+        if len(k_transpose_nodes) == 1:
+            if  (k_transpose_nodes[0].attribute[0].ints !=[0, 2, 1, 3]):
+                return 
+                
+        
+        q_transpose_node = q_transpose_nodes[0]
+        k_transpose_node_0 = k_transpose_nodes[0]
+        v_transpose_node = v_transpose_nodes[0]
+        
+        k_output = k_transpose_node_0.output[0]
+        
+        if len(k_transpose_nodes) == 2:
+            k_transpose_node_1 = k_transpose_nodes[1]
+            next_node = self.model.get_children(k_transpose_node_1)
+            if not next_node:
+                return
+                        
+            self.model.replace_node_input(next_node[0], k_transpose_node_1.output[0], k_transpose_node_0.output[0])
+            
+
+        q_path_nodes.append(q_transpose_node)
+        v_path_nodes.append(v_transpose_node)
+        k_path_nodes.extend(k_transpose_nodes)
+        
+        plugin_inputs = [split_node.input[0]] 
+        plugin_outputs = [q_transpose_node.output[0], k_output,v_transpose_node.output[0]]
+        
+        remove_nodes = [split_node]
+        
+        remove_nodes.extend(q_path_nodes)
+        remove_nodes.extend(k_path_nodes)
+        remove_nodes.extend(v_path_nodes)
+                
+        new_node,k_cache_concat_node, v_cache_concat_node = self.create_node(plugin_inputs, plugin_outputs)
+        
+        self.nodes_to_add.append(new_node)
+        self.nodes_to_add.append(k_cache_concat_node)
+        self.nodes_to_add.append(v_cache_concat_node)
+        
+        self.node_name_to_graph_name[new_node.name] = self.this_graph_name
+        self.node_name_to_graph_name[k_cache_concat_node.name] = self.this_graph_name
+        self.node_name_to_graph_name[v_cache_concat_node.name] = self.this_graph_name
+        self.nodes_to_remove.extend(remove_nodes)
+      
+    
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV_update_KVcache.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV_update_KVcache.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b1599d4b27cf32c74dc9c294564490ff1e799da
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_cosyvoice_splitQKV_update_KVcache.py
@@ -0,0 +1,188 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import Tuple, Union
+
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_utils import NumpyHelper
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+
+class FusionCosyVoiceSplitQKVUpdateKVCache(Fusion):
+    """
+    Fuse FusionSplitQKVUpdateKVCache
+    """
+
+    def __init__(self, model: OnnxModel, hidden_size: int, num_heads: int):
+        super().__init__(
+            model, "SplitQKVUpdateKVCache_IxRT", "Split"
+        )
+
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+
+    def create_node(
+        self,
+        inputs: list,
+        outputs: list,
+    ) -> Union[NodeProto, None]:
+        """Create an XSoftmax node.
+
+        Args:
+            data_input (str): data input name
+            mask_input (str): max input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+        node_name = self.model.create_node_name("SplitQKVUpdateKVCache_IxRT")
+        
+        k_cache_output = outputs[1]
+        v_cache_output = outputs[2]
+        
+        concat_k_input = k_cache_output + "_k_concat_input"
+        concat_v_input = v_cache_output + "_v_concat_input"
+        
+        plugin_outputs = [outputs[0],concat_k_input,concat_v_input]
+        
+        new_node = helper.make_node(
+            "SplitQKVUpdateKVCache_IxRT",
+            inputs=inputs,
+            outputs=plugin_outputs,
+            name=node_name,
+        )
+        
+        k_concat_node_name = node_name + "_k_concat"
+        v_concat_node_name = node_name + "_v_concat"
+        
+        k_concat_node = helper.make_node(
+            "Identity",
+            inputs=[concat_k_input],
+            outputs=[outputs[1]],
+            name=k_concat_node_name,
+        )
+        
+
+            
+        v_concat_node = helper.make_node(
+            "Identity",
+            inputs=[concat_v_input],
+            outputs=[outputs[2]],
+            name=v_concat_node_name,
+        )
+        
+
+        
+        
+        
+        
+        new_node.domain = "com.iluvatar"
+        new_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        new_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        new_node.attribute.extend([helper.make_attribute("num_head", self.num_heads)])
+        new_node.attribute.extend(
+            [helper.make_attribute("head_dim", self.hidden_size // self.num_heads)]
+        )
+        
+        self.model.replace_input_of_all_nodes(outputs[1],concat_k_input)
+        self.model.replace_input_of_all_nodes(outputs[2],concat_v_input)
+
+        return new_node,k_concat_node,v_concat_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+        
+        split_node = node
+        split_data = self.model.get_initializer_input_edges(node.name,return_np_array = True)
+        if split_data[0].shape != (3,):
+            return 
+        if split_data[0][0] != split_data[0][1] and  split_data[0][1] != split_data[0][2]:
+            return
+
+        q_input, k_input, v_input = node.output[0],node.output[1],node.output[2]  
+              
+        q_path_nodes= []
+        k_path_nodes= []
+        v_path_nodes= []
+        
+        reshape_nodes = self.model.get_children(node)
+        
+        for node in reshape_nodes:
+            if node.op_type != "Reshape":
+                return
+        q_reshape_node,k_reshape_node,v_reshape_node =  reshape_nodes[0],reshape_nodes[1],reshape_nodes[2]   
+                    
+        q_path_nodes.append(q_reshape_node)
+        k_path_nodes.append(k_reshape_node)    
+        v_path_nodes.append(v_reshape_node) 
+        
+        q_transpose_nodes = self.model.get_children(q_reshape_node) 
+        k_transpose_nodes = self.model.get_children(k_reshape_node) 
+        v_transpose_nodes = self.model.get_children(v_reshape_node)
+        
+        if  len(q_transpose_nodes)!=1 and len(k_transpose_nodes) != 1 and len(v_transpose_nodes) != 1:
+            return
+        
+        
+        q_transpose_node = q_transpose_nodes[0]
+        
+        k_transpose_node = k_transpose_nodes[0]
+        v_transpose_node = v_transpose_nodes[0]
+        
+        k_path_nodes.append(k_transpose_node)
+        v_path_nodes.append(v_transpose_node)
+        
+        
+        k_concat_nodes = self.model.get_children(k_transpose_node) 
+        v_concat_nodes = self.model.get_children(v_transpose_node)
+                
+        if  len(k_transpose_nodes) != 1 or len(v_transpose_nodes) != 1:
+            return
+        
+        k_concat_node = k_concat_nodes[0]
+        v_concat_node = v_concat_nodes[0]
+        
+        if v_concat_node.attribute[0].i != 2 and k_concat_node.attribute[0].i != 2: #axis = 2
+            return 
+                
+        k_path_nodes.append(k_concat_node)
+        v_path_nodes.append(v_concat_node)
+        
+        k_cache_input = k_concat_node.input[0]
+        if k_transpose_node.output[0] == k_concat_node.input[0]:
+            k_cache_input = k_concat_node.input[1]
+        k_cache_output =  k_concat_node.output[0]   
+        
+        
+        
+        v_cache_input = v_concat_node.input[0]
+        if v_transpose_node.output[0] == v_concat_node.input[0]:
+            v_cache_input = v_concat_node.input[1]
+        v_cache_output =  v_concat_node.output[0]  
+        
+         
+        plugin_inputs = [split_node.input[0],k_cache_input,v_cache_input] 
+        plugin_outputs = [q_transpose_node.output[0], k_cache_output,v_cache_output]
+        remove_nodes = [split_node, q_reshape_node,q_transpose_node]
+        
+        remove_nodes.extend(k_path_nodes)
+        remove_nodes.extend(v_path_nodes)
+        new_node,k_cache_concat_node, v_cache_concat_node= self.create_node(plugin_inputs, plugin_outputs)
+
+        self.nodes_to_add.append(new_node)
+        self.nodes_to_add.append(k_cache_concat_node)
+        self.nodes_to_add.append(v_cache_concat_node)
+        
+        self.node_name_to_graph_name[new_node.name] = self.this_graph_name
+        self.node_name_to_graph_name[k_cache_concat_node.name] = self.this_graph_name
+        self.node_name_to_graph_name[v_cache_concat_node.name] = self.this_graph_name
+        
+        self.nodes_to_remove.extend(remove_nodes)
+       
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_customfc.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_customfc.py
index e9e4011509e00ecedd3c5237e4320d3cd1a7d316..c2dd243357fac20057d67551c0d3d9d86b15dc68 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_customfc.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_customfc.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -291,7 +307,7 @@ class FusionCustomFCActivation(Fusion):
             fc_node = nodes[0]
             activation_type = 3
             if node.op_type == "Gelu":
-                activation_type = 21
+                activation_type = 3
             if node.op_type == "Relu":
                 activation_type = 4
 
@@ -342,3 +358,32 @@ class FusionConformerCustomFCActivation(Fusion):
         self.nodes_to_add.append(custom_fc_node)
         self.nodes_to_remove.extend([node, sigmoid_node, custom_fc_node])
         self.node_name_to_graph_name[custom_fc_node.name] = self.this_graph_name
+
+
+class FusionTorchvisionVitCustomFC(Fusion):
+    def __init__(self, model: OnnxModel):
+        super().__init__(model, "CustomFCPluginDynamic_IxRT", ["CustomQKVToContextPluginDynamic_IxRT"], "torchvision vit custom_fc",)
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+        
+        custom_fc_node_0 = self.model.get_children(node, input_name_to_nodes)
+        transpose_node_0 = self.model.get_children(custom_fc_node_0[0], input_name_to_nodes)
+        
+        if transpose_node_0[0].op_type != "Transpose":
+            return
+        
+        custom_fc_node_0[0].output[0] = transpose_node_0[0].output[0]
+        
+        nodes = self.model.match_parent_path(node, ["CustomFCPluginDynamic_IxRT","Transpose"], [0, 0])
+        if nodes is None:
+            return
+        
+        (custom_fc_node_1, transpose_node_1) = nodes
+        custom_fc_node_1.input[0] = transpose_node_1.input[0]
+        
+        self.nodes_to_add.append(custom_fc_node_1)
+        self.nodes_to_add.append(custom_fc_node_0[0])
+        self.nodes_to_remove.extend([transpose_node_1, custom_fc_node_1, transpose_node_0[0], custom_fc_node_0[0]])
+        self.node_name_to_graph_name[custom_fc_node_1.name] = self.this_graph_name
+        self.node_name_to_graph_name[custom_fc_node_0[0].name] = self.this_graph_name
+        
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_disentangled_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_disentangled_attention.py
index 04eb863f81fb8f026c74fa52ce5e2ca959cee13c..670a767e18e3ccd13d5540c9a415aa3ad8fc7525 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_disentangled_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_disentangled_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_embedlayer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_embedlayer.py
index 90bddbf89ece285a7be5b4e4f45a55defbdd138f..f46fa2c77da83612a25dd7bde215f20e70845ff7 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_embedlayer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_embedlayer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,9 +22,10 @@
 from logging import getLogger
 from typing import Dict, List, Tuple, Union
 
+from onnx import NodeProto, TensorProto, helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import NodeProto, TensorProto, helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -33,7 +50,9 @@ class FusionEmbedLayerNoMask(Fusion):
         self.attention = None
         self.embed_node = None
 
-    def match_two_gather(self, add: NodeProto) -> Union[None, Tuple[NodeProto, NodeProto]]:
+    def match_two_gather(
+        self, add: NodeProto
+    ) -> Union[None, Tuple[NodeProto, NodeProto]]:
         gather_0_path = self.model.match_parent_path(add, ["Gather"], [0])
         if gather_0_path is None:
             return None
@@ -70,7 +89,11 @@ class FusionEmbedLayerNoMask(Fusion):
             children = input_name_to_nodes[layernorm.output[0]]
 
             # For Albert, there is MatMul+Add after embedding layer before attention.
-            if len(children) == 1 and children[0].op_type == "MatMul" and children[0].output[0] in input_name_to_nodes:
+            if (
+                len(children) == 1
+                and children[0].op_type == "MatMul"
+                and children[0].output[0] in input_name_to_nodes
+            ):
                 grandchildren = input_name_to_nodes[children[0].output[0]]
                 if (
                     len(grandchildren) == 1
@@ -90,24 +113,37 @@ class FusionEmbedLayerNoMask(Fusion):
             if is_distil_bert:
                 # SkipLayerNormailization might exist when model has been optimized by ORT first.
                 if (
-                    children_types != ["MatMul", "MatMul", "MatMul", "Shape", "SkipLayerNormalization"]
-                    and children_types != ["Add", "MatMul", "MatMul", "MatMul", "Shape", "Shape"]
+                    children_types
+                    != ["MatMul", "MatMul", "MatMul", "Shape", "SkipLayerNormalization"]
+                    and children_types
+                    != ["Add", "MatMul", "MatMul", "MatMul", "Shape", "Shape"]
                     and children_types != ["Add", "MatMul", "MatMul", "MatMul", "Shape"]
                 ):
-                    logger.debug("No Attention like subgraph in children of LayerNormalization")
+                    logger.debug(
+                        "No Attention like subgraph in children of LayerNormalization"
+                    )
                     return False
             else:
-                if children_types != ["Add", "MatMul", "MatMul", "MatMul",] and children_types != [
+                if children_types != [
+                    "Add",
+                    "MatMul",
+                    "MatMul",
+                    "MatMul",
+                ] and children_types != [
                     "MatMul",
                     "MatMul",
                     "MatMul",
                     "SkipLayerNormalization",
                 ]:
-                    logger.debug("No Attention like subgraph in children of LayerNormalization")
+                    logger.debug(
+                        "No Attention like subgraph in children of LayerNormalization"
+                    )
                     return False
         return True
 
-    def match_position_embedding_distilbert(self, position_embedding_gather, input_ids, output_name_to_node):
+    def match_position_embedding_distilbert(
+        self, position_embedding_gather, input_ids, output_name_to_node
+    ):
         """  Match position embedding path from input_ids to Gather for DistilBert.
 
         Pattern is like the following:
@@ -128,7 +164,9 @@ class FusionEmbedLayerNoMask(Fusion):
                       Gather
         """
         # remove after tests pass
-        path1 = self.model.match_parent_path(position_embedding_gather, ["Expand", "Shape"], [1, 1])
+        path1 = self.model.match_parent_path(
+            position_embedding_gather, ["Expand", "Shape"], [1, 1]
+        )
         if path1 is None:
             path1 = self.model.match_parent_path(
                 position_embedding_gather,
@@ -155,7 +193,8 @@ class FusionEmbedLayerNoMask(Fusion):
 
         range_node = path2[1]
         if not (
-            self.utils.check_node_input_value(range_node, 0, 0) and self.utils.check_node_input_value(range_node, 2, 1)
+            self.utils.check_node_input_value(range_node, 0, 0)
+            and self.utils.check_node_input_value(range_node, 2, 1)
         ):
             return False
 
@@ -169,7 +208,9 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return True
 
-    def match_position_embedding_roberta(self, position_embedding_gather, input_ids, output_name_to_node):
+    def match_position_embedding_roberta(
+        self, position_embedding_gather, input_ids, output_name_to_node
+    ):
         """Match position embedding path from input_ids to Gather for Roberta.
 
         Roberta Embedding Layer Pattern (* is optional since it might be removed by ORT, ? is the padding word id):
@@ -216,10 +257,12 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return False
 
-    def match_position_embedding_bert(self, position_embedding_gather, input_ids, output_name_to_node):
+    def match_position_embedding_bert(
+        self, position_embedding_gather, input_ids, output_name_to_node
+    ):
         """  Match position embedding path from input_ids to Gather for BERT.
 
-        BERT Embedding Layer Pattern:       
+        BERT Embedding Layer Pattern:
                                     (input_ids)
                                    /         \
                                  /          Shape
@@ -232,7 +275,7 @@ class FusionEmbedLayerNoMask(Fusion):
                            \        |           |
                             \     Gather      Slice (data[1,512], starts=0, ends=*, axes=1, steps=1)
                               \    /            |
-                                Add          Gather 
+                                Add          Gather
                                    \       /
                                       Add
                                        |
@@ -255,7 +298,10 @@ class FusionEmbedLayerNoMask(Fusion):
             and slice_weight.shape[0] == 1
             and self.utils.check_node_input_value(slice, 1, [0])
             and self.utils.check_node_input_value(slice, 3, [1])
-            and (len(slice.input) == 4 or self.utils.check_node_input_value(slice, 4, [1]))
+            and (
+                len(slice.input) == 4
+                or self.utils.check_node_input_value(slice, 4, [1])
+            )
         ):
             return False
 
@@ -288,8 +334,12 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return input_ids == shape.input[0]
 
-    def match_position_embedding(self, position_embedding_gather, input_ids, output_name_to_node):
-        if self.match_position_embedding_bert(position_embedding_gather, input_ids, output_name_to_node):
+    def match_position_embedding(
+        self, position_embedding_gather, input_ids, output_name_to_node
+    ):
+        if self.match_position_embedding_bert(
+            position_embedding_gather, input_ids, output_name_to_node
+        ):
             return True
 
         # TODO: Support roberta (position starts from 2 instead of 0) in EmbedLayerNormalization kernel
@@ -297,15 +347,21 @@ class FusionEmbedLayerNoMask(Fusion):
         # if self.match_position_embedding_roberta(position_embedding_gather, input_ids, output_name_to_node):
         #    return True
 
-        if self.match_position_embedding_distilbert(position_embedding_gather, input_ids, output_name_to_node):
+        if self.match_position_embedding_distilbert(
+            position_embedding_gather, input_ids, output_name_to_node
+        ):
             return True
 
         return False
 
-    def check_embedding(self, word_embedding_gather, segment_embedding_gather, position_embedding_gather):
+    def check_embedding(
+        self, word_embedding_gather, segment_embedding_gather, position_embedding_gather
+    ):
         """Sanity check of embedding weights, and match hidden_size of weights and shape of inputs."""
         input_ids = word_embedding_gather.input[1]
-        segment_ids = segment_embedding_gather.input[1] if segment_embedding_gather else None
+        segment_ids = (
+            segment_embedding_gather.input[1] if segment_embedding_gather else None
+        )
         position_ids = position_embedding_gather.input[1]
 
         if self.shape_infer_helper is not None:
@@ -324,7 +380,9 @@ class FusionEmbedLayerNoMask(Fusion):
                 )
                 return False
 
-            if segment_ids and not self.shape_infer_helper.compare_shape(input_ids, segment_ids):
+            if segment_ids and not self.shape_infer_helper.compare_shape(
+                input_ids, segment_ids
+            ):
                 logger.info(
                     "Cannot fuse EmbedLayerNormalization: input_ids and segment_ids does not have same shape: {} != {}".format(
                         input_ids_shape,
@@ -333,28 +391,40 @@ class FusionEmbedLayerNoMask(Fusion):
                 )
                 return False
 
-        word_embedding_table = self.model.get_constant_value(word_embedding_gather.input[0])
+        word_embedding_table = self.model.get_constant_value(
+            word_embedding_gather.input[0]
+        )
         if word_embedding_table is None or len(word_embedding_table.shape) != 2:
-            logger.info("Cannot fuse EmbedLayerNormalization: word embedding table is not expected")
+            logger.info(
+                "Cannot fuse EmbedLayerNormalization: word embedding table is not expected"
+            )
             return False
 
-        position_embedding_table = self.model.get_constant_value(position_embedding_gather.input[0])
+        position_embedding_table = self.model.get_constant_value(
+            position_embedding_gather.input[0]
+        )
         if (
             position_embedding_table is None
             or len(position_embedding_table.shape) != 2
             or (word_embedding_table.shape[1] != position_embedding_table.shape[1])
         ):
-            logger.info("Cannot fuse EmbedLayerNormalization: position embedding table is not expected")
+            logger.info(
+                "Cannot fuse EmbedLayerNormalization: position embedding table is not expected"
+            )
             return False
 
         if segment_ids:
-            segment_embedding_table = self.model.get_constant_value(segment_embedding_gather.input[0])
+            segment_embedding_table = self.model.get_constant_value(
+                segment_embedding_gather.input[0]
+            )
             if (
                 segment_embedding_table is None
                 or len(segment_embedding_table.shape) != 2
                 or (word_embedding_table.shape[1] != segment_embedding_table.shape[1])
             ):
-                logger.info("Cannot fuse EmbedLayerNormalization: segment embedding table is not expected")
+                logger.info(
+                    "Cannot fuse EmbedLayerNormalization: segment embedding table is not expected"
+                )
                 return False
 
         # In normal case, word embeding table is the largest, and segment embedding table is the smallest, while postion embedding table is in between.
@@ -392,7 +462,9 @@ class FusionEmbedLayerNoMask(Fusion):
         graph_input = self.model.find_graph_input(input_name)
         if graph_input is not None:
             if graph_input.type.tensor_type.elem_type != TensorProto.INT32:
-                int32_output, input_cast_node = self.utils.cast_input_to_int32(input_name)
+                int32_output, input_cast_node = self.utils.cast_input_to_int32(
+                    input_name
+                )
             else:
                 int32_output = input_name
         else:
@@ -515,7 +587,9 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return len(nodes) > 1
 
-    def fuse_gpt2(self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+    def fuse_gpt2(
+        self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node
+    ):
         # graph checks
         # gpt2 has no segment embedding, subgraph pattern is like
         #     input_ids  position_ids
@@ -543,10 +617,14 @@ class FusionEmbedLayerNoMask(Fusion):
         input_ids = word_embedding_gather.input[1]
         position_ids = position_embedding_gather.input[1]
 
-        if not self.check_attention_subgraph(layernorm, input_name_to_nodes, is_distil_bert=False):
+        if not self.check_attention_subgraph(
+            layernorm, input_name_to_nodes, is_distil_bert=False
+        ):
             return False
 
-        if not self.check_embedding(word_embedding_gather, None, position_embedding_gather):
+        if not self.check_embedding(
+            word_embedding_gather, None, position_embedding_gather
+        ):
             return False
 
         optional_embedding_sum_output = False
@@ -571,7 +649,9 @@ class FusionEmbedLayerNoMask(Fusion):
 
         return True
 
-    def fuse_distilbert(self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+    def fuse_distilbert(
+        self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node
+    ):
         """Fuse embedding layer for DistilBert
         Args:
             layernorm (NodeProto): node of LayerNormalization or SkipLayerNormalization
@@ -597,13 +677,19 @@ class FusionEmbedLayerNoMask(Fusion):
         word_embedding_gather, position_embedding_gather = two_gather
         input_ids = word_embedding_gather.input[1]
 
-        if not self.check_attention_subgraph(layernorm, input_name_to_nodes, is_distil_bert=True):
+        if not self.check_attention_subgraph(
+            layernorm, input_name_to_nodes, is_distil_bert=True
+        ):
             return False
 
-        if not self.match_position_embedding(position_embedding_gather, input_ids, output_name_to_node):
+        if not self.match_position_embedding(
+            position_embedding_gather, input_ids, output_name_to_node
+        ):
             return False
 
-        if not self.check_embedding(word_embedding_gather, None, position_embedding_gather):
+        if not self.check_embedding(
+            word_embedding_gather, None, position_embedding_gather
+        ):
             return False
 
         embed_node = self.create_fused_node(
@@ -612,7 +698,9 @@ class FusionEmbedLayerNoMask(Fusion):
         self.finish_fusion(layernorm, embed_node)
         return True
 
-    def fuse_bert(self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+    def fuse_bert(
+        self, layernorm, add_before_layernorm, input_name_to_nodes, output_name_to_node
+    ):
         """Fuse embedding layer for Bert
         Args:
             layernorm (NodeProto): node of LayerNormalization or SkipLayerNormalization
@@ -633,23 +721,33 @@ class FusionEmbedLayerNoMask(Fusion):
 
         input_ids = word_embedding_gather.input[1]
 
-        if not self.check_attention_subgraph(layernorm, input_name_to_nodes, is_distil_bert=False):
+        if not self.check_attention_subgraph(
+            layernorm, input_name_to_nodes, is_distil_bert=False
+        ):
             return False
 
-        position_embedding_path = self.model.match_parent_path(add_before_layernorm, ["Gather"], [1])
+        position_embedding_path = self.model.match_parent_path(
+            add_before_layernorm, ["Gather"], [1]
+        )
         if position_embedding_path is None:
             return False
 
         position_embedding_gather = position_embedding_path[0]
-        if not self.match_position_embedding(position_embedding_gather, input_ids, output_name_to_node):
-            if not self.match_position_embedding(segment_embedding_gather, input_ids, output_name_to_node):
+        if not self.match_position_embedding(
+            position_embedding_gather, input_ids, output_name_to_node
+        ):
+            if not self.match_position_embedding(
+                segment_embedding_gather, input_ids, output_name_to_node
+            ):
                 return False
             # position and segment are switched
             temp = segment_embedding_gather
             segment_embedding_gather = position_embedding_gather
             position_embedding_gather = temp
 
-        if not self.check_embedding(word_embedding_gather, segment_embedding_gather, position_embedding_gather):
+        if not self.check_embedding(
+            word_embedding_gather, segment_embedding_gather, position_embedding_gather
+        ):
             return False
 
         embed_node = self.create_fused_node(
@@ -671,13 +769,19 @@ class FusionEmbedLayerNoMask(Fusion):
         else:  # SkipLayerNormalization
             add_before_layernorm = node  # Add is fused into SkipLayerNormalization
 
-        if self.fuse_gpt2(node, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+        if self.fuse_gpt2(
+            node, add_before_layernorm, input_name_to_nodes, output_name_to_node
+        ):
             return
 
-        if self.fuse_distilbert(node, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+        if self.fuse_distilbert(
+            node, add_before_layernorm, input_name_to_nodes, output_name_to_node
+        ):
             return
 
-        if self.fuse_bert(node, add_before_layernorm, input_name_to_nodes, output_name_to_node):
+        if self.fuse_bert(
+            node, add_before_layernorm, input_name_to_nodes, output_name_to_node
+        ):
             return
 
 
@@ -701,3 +805,274 @@ class FusionEmbedLayerNormalization(FusionEmbedLayerNoMask):
                     self.nodes_to_remove.extend([node])
                     embed_node.input.append(mask_input_name)
                     embed_node.output[1] = mask_index
+
+
+class FusionBertEmbedLayerNormalization(Fusion):
+    """
+    Fuse BertEmbedLayerNormalization subgraph into one node.
+    """
+
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "CustomEmbLayerNormPluginDynamic_IxRT", "CustomQKVToContextPluginDynamic_IxRT"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+        input -->  CustomEmbLayerNormPluginDynamic_IxRT --> CustomFCPluginDynamic_IxRT -->  CustomQKVToContextPluginDynamic_IxRT  --> CustomFCPluginDynamic_IxRT
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        parent = self.model.get_parents(node, output_name_to_node)
+        
+        if len(children) == 0:
+            return
+        if len(parent) == 0:
+            return
+
+        start_node = node
+               
+        # word_embeddings
+        word_embeddings_node = self.model.match_parent_path(
+            start_node,
+            ["CustomFCPluginDynamic_IxRT", "LayerNormalization", "Add", "Add", "Gather"],
+            [0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+
+        # token_type_embeddings
+        token_type_embeddings_node = self.model.match_parent_path(
+            start_node,
+            ["CustomFCPluginDynamic_IxRT", "LayerNormalization", "Add", "Add", "Gather"],
+            [0, 0, 0, 0, 1],
+            output_name_to_node,
+        )
+        
+        # attention_mask
+        attention_mask_node = self.model.match_parent_path(
+            start_node,
+            ["Mul", "Sub", "Cast", "Unsqueeze"],
+            [1, 0, 1, 0],
+            output_name_to_node,
+        )
+        
+        if word_embeddings_node is None or token_type_embeddings_node is None or attention_mask_node is None:
+            return
+        
+        if word_embeddings_node and token_type_embeddings_node and attention_mask_node:
+            subgraph_nodes = []
+            subgraph_nodes.extend(word_embeddings_node)
+            subgraph_nodes.extend(token_type_embeddings_node)
+            subgraph_nodes.extend(attention_mask_node)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            subgraph_nodes_remove = []
+            for item in subgraph_nodes_unique:
+                if item.op_type != "CustomFCPluginDynamic_IxRT":
+                    subgraph_nodes_remove.append(item)
+    
+        # input_ids = self.model.get_graph_inputs_excluding_initializers()[0]
+        # token_type_ids = self.model.get_graph_inputs_excluding_initializers()[1]
+        # attention_mask = self.model.get_graph_inputs_excluding_initializers()[2]
+        
+        emblayernorm_out = word_embeddings_node[1].output[0]
+        emblayernorm_out_mask = attention_mask_node[0].output[0]
+        
+        # self.model.modify_node_output_type(emblayernorm_out_mask, 5)
+    
+        beta_data = self.model.get_initializer(word_embeddings_node[1].input[2], True)
+        embeddings_layernorm_beta_name = "bert_embeddings_layernorm_beta"
+        embeddings_layernorm_beta = helper.make_tensor(
+            embeddings_layernorm_beta_name, TensorProto.FLOAT, beta_data.shape, beta_data.flatten().tolist())
+        
+        gamma_data = self.model.get_initializer(word_embeddings_node[1].input[1], True)
+        embeddings_layernorm_gamma_name = "bert_embeddings_layernorm_gamma"
+        embeddings_layernorm_gamma = helper.make_tensor(
+            embeddings_layernorm_gamma_name, TensorProto.FLOAT, gamma_data.shape, gamma_data.flatten().tolist())
+        
+        embeddings_word_embeddings_data = self.model.get_initializer(word_embeddings_node[4].input[0], True)
+        embeddings_word_embeddings_name = "bert_embeddings_word_embeddings"
+        embeddings_word_embeddings = helper.make_tensor(
+            embeddings_word_embeddings_name, TensorProto.FLOAT, embeddings_word_embeddings_data.shape, 
+            embeddings_word_embeddings_data.flatten().tolist())
+        
+        embeddings_token_type_embeddings_data = self.model.get_initializer(token_type_embeddings_node[4].input[0], True)
+        embeddings_token_type_embeddings_name = "bert_embeddings_token_type_embeddings"
+        embeddings_token_type_embeddings = helper.make_tensor(
+            embeddings_token_type_embeddings_name, TensorProto.FLOAT, embeddings_token_type_embeddings_data.shape, 
+            embeddings_token_type_embeddings_data.flatten().tolist())
+        
+        embeddings_position_embeddings_data = self.model.get_initializer(token_type_embeddings_node[2].input[1], True)
+        embeddings_position_embeddings_name = "bert_embeddings_token_type_embeddings"
+        embeddings_position_embeddings = helper.make_tensor(
+            embeddings_position_embeddings_name, TensorProto.FLOAT, embeddings_position_embeddings_data.shape, 
+            embeddings_position_embeddings_data.flatten().tolist())
+        
+        self.model.add_initializer(embeddings_layernorm_beta, self.this_graph_name)
+        self.model.add_initializer(embeddings_layernorm_gamma, self.this_graph_name)
+        self.model.add_initializer(embeddings_word_embeddings, self.this_graph_name)
+        self.model.add_initializer(embeddings_token_type_embeddings, self.this_graph_name)
+        self.model.add_initializer(embeddings_position_embeddings, self.this_graph_name)
+        
+
+        emblayernorm_node = helper.make_node(
+            "CustomEmbLayerNormPluginDynamic_IxRT",
+            inputs=[word_embeddings_node[4].input[1], token_type_embeddings_node[4].input[1], attention_mask_node[3].input[0]],
+            outputs=[emblayernorm_out, emblayernorm_out_mask],
+            name=self.model.create_node_name(
+                "BertEmbedLayerNormalization", name_prefix="BertEmbedLayerNormalization"
+            ),
+        )
+        emblayernorm_node.domain = "com.iluvatar"
+        emblayernorm_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        emblayernorm_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        emblayernorm_node.attribute.extend([helper.make_attribute("output_fp16", 1)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("full_mask", 1)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("mha_type_id", 2)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("pad_id", 0)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_layernorm_beta", embeddings_layernorm_beta)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_layernorm_gamma", embeddings_layernorm_gamma)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_word_embeddings", embeddings_word_embeddings)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_token_type_embeddings", embeddings_token_type_embeddings)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_position_embeddings", embeddings_position_embeddings)])
+        
+        self.nodes_to_remove.extend(subgraph_nodes_remove)
+        
+        self.nodes_to_add.append(emblayernorm_node)
+        self.node_name_to_graph_name[emblayernorm_node.name] = self.this_graph_name
+
+
+class FusionAlbertEmbedLayerNormalization(Fusion):
+    """
+    Fuse AlbertEmbedLayerNormalization subgraph into one node.
+    """
+
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "CustomEmbLayerNormPluginDynamic_IxRT", "CustomQKVToContextPluginDynamic_IxRT"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+        input -->  CustomEmbLayerNormPluginDynamic_IxRT --> CustomFCPluginDynamic_IxRT -->  CustomFCPluginDynamic_IxRT --> CustomQKVToContextPluginDynamic_IxRT  --> CustomFCPluginDynamic_IxRT
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        parent = self.model.get_parents(node, output_name_to_node)
+        
+        if len(children) == 0:
+            return
+        if len(parent) == 0:
+            return
+
+        start_node = node
+               
+        # word_embeddings
+        word_embeddings_node = self.model.match_parent_path(
+            start_node,
+            ["CustomFCPluginDynamic_IxRT","CustomFCPluginDynamic_IxRT", "LayerNormalization", "Add", "Add", "Gather"],
+            [0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+
+        # token_type_embeddings
+        token_type_embeddings_node = self.model.match_parent_path(
+            start_node,
+            ["CustomFCPluginDynamic_IxRT","CustomFCPluginDynamic_IxRT", "LayerNormalization", "Add", "Add", "Gather"],
+            [0, 0, 0, 0, 0, 1],
+            output_name_to_node,
+        )
+        
+        # attention_mask
+        attention_mask_node = self.model.match_parent_path(
+            start_node,
+            ["Mul", "Sub", "Cast", "Unsqueeze"],
+            [1, 0, 1, 0],
+            output_name_to_node,
+        )
+        
+        if word_embeddings_node is None or token_type_embeddings_node is None or attention_mask_node is None:
+            return
+        
+        if word_embeddings_node and token_type_embeddings_node and attention_mask_node:
+            subgraph_nodes = []
+            subgraph_nodes.extend(word_embeddings_node)
+            subgraph_nodes.extend(token_type_embeddings_node)
+            subgraph_nodes.extend(attention_mask_node)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            subgraph_nodes_remove = []
+            for item in subgraph_nodes_unique:
+                if item.op_type != "CustomFCPluginDynamic_IxRT":
+                    subgraph_nodes_remove.append(item)
+        
+        # input_ids = self.model.get_graph_inputs_excluding_initializers()[0]
+        # token_type_ids = self.model.get_graph_inputs_excluding_initializers()[1]
+        # attention_mask = self.model.get_graph_inputs_excluding_initializers()[2]
+        
+        emblayernorm_out = word_embeddings_node[2].output[0]
+        emblayernorm_out_mask = attention_mask_node[0].output[0]
+        
+        beta_data = self.model.get_initializer(word_embeddings_node[2].input[2], True)
+        embeddings_layernorm_beta_name = "bert_embeddings_layernorm_beta"
+        embeddings_layernorm_beta = helper.make_tensor(
+            embeddings_layernorm_beta_name, TensorProto.FLOAT, beta_data.shape, beta_data.flatten().tolist())
+        
+        gamma_data = self.model.get_initializer(word_embeddings_node[2].input[1], True)
+        embeddings_layernorm_gamma_name = "bert_embeddings_layernorm_gamma"
+        embeddings_layernorm_gamma = helper.make_tensor(
+            embeddings_layernorm_gamma_name, TensorProto.FLOAT, gamma_data.shape, gamma_data.flatten().tolist())
+        
+        embeddings_word_embeddings_data = self.model.get_initializer(word_embeddings_node[5].input[0], True)
+        embeddings_word_embeddings_name = "bert_embeddings_word_embeddings"
+        embeddings_word_embeddings = helper.make_tensor(
+            embeddings_word_embeddings_name, TensorProto.FLOAT, embeddings_word_embeddings_data.shape, 
+            embeddings_word_embeddings_data.flatten().tolist())
+        
+        embeddings_token_type_embeddings_data = self.model.get_initializer(token_type_embeddings_node[5].input[0], True)
+        embeddings_token_type_embeddings_name = "bert_embeddings_token_type_embeddings"
+        embeddings_token_type_embeddings = helper.make_tensor(
+            embeddings_token_type_embeddings_name, TensorProto.FLOAT, embeddings_token_type_embeddings_data.shape, 
+            embeddings_token_type_embeddings_data.flatten().tolist())
+        
+        embeddings_position_embeddings_data = self.model.get_initializer(token_type_embeddings_node[3].input[1], True)
+        embeddings_position_embeddings_name = "bert_embeddings_token_type_embeddings"
+        embeddings_position_embeddings = helper.make_tensor(
+            embeddings_position_embeddings_name, TensorProto.FLOAT, embeddings_position_embeddings_data.shape, 
+            embeddings_position_embeddings_data.flatten().tolist())
+        
+        self.model.add_initializer(embeddings_layernorm_beta, self.this_graph_name)
+        self.model.add_initializer(embeddings_layernorm_gamma, self.this_graph_name)
+        self.model.add_initializer(embeddings_word_embeddings, self.this_graph_name)
+        self.model.add_initializer(embeddings_token_type_embeddings, self.this_graph_name)
+        self.model.add_initializer(embeddings_position_embeddings, self.this_graph_name)
+        
+        emblayernorm_node = helper.make_node(
+            "CustomEmbLayerNormPluginDynamic_IxRT",
+            inputs=[word_embeddings_node[5].input[1], token_type_embeddings_node[5].input[1], attention_mask_node[3].input[0]],
+            outputs=[emblayernorm_out, emblayernorm_out_mask],
+            name=self.model.create_node_name(
+                "BertEmbedLayerNormalization", name_prefix="BertEmbedLayerNormalization"
+            ),
+        )
+        emblayernorm_node.domain = "com.iluvatar"
+        emblayernorm_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        emblayernorm_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        emblayernorm_node.attribute.extend([helper.make_attribute("output_fp16", 1)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("full_mask", 1)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("mha_type_id", 2)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("pad_id", 0)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_layernorm_beta", embeddings_layernorm_beta)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_layernorm_gamma", embeddings_layernorm_gamma)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_word_embeddings", embeddings_word_embeddings)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_token_type_embeddings", embeddings_token_type_embeddings)])
+        emblayernorm_node.attribute.extend([helper.make_attribute("bert_embeddings_position_embeddings", embeddings_position_embeddings)])
+        
+        self.nodes_to_remove.extend(subgraph_nodes_remove)
+        
+        self.nodes_to_add.append(emblayernorm_node)
+        self.node_name_to_graph_name[emblayernorm_node.name] = self.this_graph_name
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_fastgelu.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_fastgelu.py
index 0e24a9dd7e018bd949f4a2bba18de7d2c909ce2b..067ff26e4eb51ea0df3ad6b49318179afd3b4177 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_fastgelu.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_fastgelu.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_format_roformer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_format_roformer.py
index 5b6d66ad3f6ae5e73a2f921c9b807fa22e439c33..1f60ab7628f1d700042cf1e025df5bb22fc1d641 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_format_roformer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_format_roformer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu.py
index f4c5c7e848dc033d69d5fad17834f1b20ed89bd0..714212664e452ad7a42daa3623185d973e4bb773 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,8 +21,9 @@
 from logging import getLogger
 from typing import Dict, Optional
 
-from .fusion_base import Fusion
 from onnx import helper
+
+from .fusion_base import Fusion
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -25,7 +42,9 @@ class FusionGelu(Fusion):
             return
         self.fuse_4(erf_node, input_name_to_nodes, output_name_to_node)
 
-    def fuse_1(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]:
+    def fuse_1(
+        self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict
+    ) -> Optional[bool]:
         """
         This pattern is from PyTorch model
         Fuse Gelu with Erf into one node:
@@ -81,7 +100,9 @@ class FusionGelu(Fusion):
                 return
             subgraph_output = mul_half.output[0]
         else:  # pattern 1
-            mul_half = self.model.match_parent(mul_after_erf, "Mul", another, output_name_to_node)
+            mul_half = self.model.match_parent(
+                mul_after_erf, "Mul", another, output_name_to_node
+            )
             if mul_half is None:
                 return
 
@@ -100,13 +121,17 @@ class FusionGelu(Fusion):
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
-        fused_node = helper.make_node("Gelu", inputs=[subgraph_input], outputs=[subgraph_output])
+        fused_node = helper.make_node(
+            "Gelu", inputs=[subgraph_input], outputs=[subgraph_output]
+        )
         fused_node.domain = "com.microsoft"
         self.nodes_to_add.append(fused_node)
         self.node_name_to_graph_name[fused_node.name] = self.this_graph_name
         return True
 
-    def fuse_2(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]:
+    def fuse_2(
+        self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict
+    ) -> Optional[bool]:
         """
         This pattern is from Keras model
         Fuse Gelu with Erf into one node:
@@ -174,13 +199,17 @@ class FusionGelu(Fusion):
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
-        fused_node = helper.make_node("Gelu", inputs=[root_node.output[0]], outputs=[mul.output[0]])
+        fused_node = helper.make_node(
+            "Gelu", inputs=[root_node.output[0]], outputs=[mul.output[0]]
+        )
         fused_node.domain = "com.microsoft"
         self.nodes_to_add.append(fused_node)
         self.node_name_to_graph_name[fused_node.name] = self.this_graph_name
         return True
 
-    def fuse_3(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]:
+    def fuse_3(
+        self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict
+    ) -> Optional[bool]:
         """
         This pattern is from TensorFlow model
         Fuse Gelu with Erf into one node:
@@ -221,7 +250,9 @@ class FusionGelu(Fusion):
         if i < 0:
             return
 
-        root_node = self.model.get_parent(first_mul, 0 if i == 1 else 1, output_name_to_node)
+        root_node = self.model.get_parent(
+            first_mul, 0 if i == 1 else 1, output_name_to_node
+        )
         if root_node is None:
             return
 
@@ -232,7 +263,10 @@ class FusionGelu(Fusion):
             return
         last_mul = children[0]
 
-        if not (last_mul.input[0] == root_node.output[0] or last_mul.input[1] == root_node.output[0]):
+        if not (
+            last_mul.input[0] == root_node.output[0]
+            or last_mul.input[1] == root_node.output[0]
+        ):
             return
 
         subgraph_nodes = [first_mul, erf_node, add_after_erf, mul_half, last_mul]
@@ -245,13 +279,17 @@ class FusionGelu(Fusion):
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
-        fused_node = helper.make_node("Gelu", inputs=[root_node.output[0]], outputs=[last_mul.output[0]])
+        fused_node = helper.make_node(
+            "Gelu", inputs=[root_node.output[0]], outputs=[last_mul.output[0]]
+        )
         fused_node.domain = "com.microsoft"
         self.nodes_to_add.append(fused_node)
         self.node_name_to_graph_name[fused_node.name] = self.this_graph_name
         return True
 
-    def fuse_4(self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict) -> Optional[bool]:
+    def fuse_4(
+        self, erf_node, input_name_to_nodes: Dict, output_name_to_node: Dict
+    ) -> Optional[bool]:
         """
         This pattern is from TensorFlow model
         Fuse Gelu with Erf into one node:
@@ -288,7 +326,9 @@ class FusionGelu(Fusion):
             return
         mul_after_erf = children[0]
 
-        mul_before_erf = self.model.match_parent(erf_node, "Mul", 0, output_name_to_node)
+        mul_before_erf = self.model.match_parent(
+            erf_node, "Mul", 0, output_name_to_node
+        )
         if mul_before_erf is None:
             return
 
@@ -307,7 +347,9 @@ class FusionGelu(Fusion):
                 return
             subgraph_output = mul_half.output[0]
         else:  # pattern 1
-            mul_half = self.model.match_parent(mul_after_erf, "Mul", another, output_name_to_node)
+            mul_half = self.model.match_parent(
+                mul_after_erf, "Mul", another, output_name_to_node
+            )
             if mul_half is None:
                 return
 
@@ -319,15 +361,23 @@ class FusionGelu(Fusion):
 
             subgraph_output = mul_after_erf.output[0]
 
-        subgraph_nodes = [mul_before_erf, erf_node, add_after_erf, mul_after_erf, mul_half]
+        subgraph_nodes = [
+            mul_before_erf,
+            erf_node,
+            add_after_erf,
+            mul_after_erf,
+            mul_half,
+        ]
         if not self.model.is_safe_to_fuse_nodes(
             subgraph_nodes, [subgraph_output], input_name_to_nodes, output_name_to_node
         ):
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
-        fused_node = helper.make_node("Gelu", inputs=[subgraph_input], outputs=[subgraph_output])
+        fused_node = helper.make_node(
+            "Gelu", inputs=[subgraph_input], outputs=[subgraph_output]
+        )
         fused_node.domain = "com.microsoft"
         self.nodes_to_add.append(fused_node)
         self.node_name_to_graph_name[fused_node.name] = self.this_graph_name
-        return True
\ No newline at end of file
+        return True
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu_approximation.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu_approximation.py
index 35f4b93a732e7cc73dd5f9ae917f75bd505c93a3..a89e558cb76aa8208e4a19983f038e9f3584ffdb 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu_approximation.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gelu_approximation.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,8 +21,9 @@
 
 from logging import getLogger
 
-from .fusion_base import Fusion
 from onnx import helper
+
+from .fusion_base import Fusion
 from .onnx_model import OnnxModel
 
 
@@ -19,7 +36,9 @@ class FusionGeluApproximation(Fusion):
             "FastGelu",
             inputs=node.input,
             outputs=node.output,
-            name=self.model.create_node_name("FastGelu", node.op_type + "_Approximation"),
+            name=self.model.create_node_name(
+                "FastGelu", node.op_type + "_Approximation"
+            ),
         )
         new_node.domain = "com.microsoft"
         self.nodes_to_remove.append(node)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention.py
index b856dd19de9f03cd5b799eb1e042ed6bce193fd2..805cd3bf7dfbf337a633eaa583d14833cdf86282 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,9 +21,10 @@
 from logging import getLogger
 
 import numpy as np
+from onnx import TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -20,7 +37,9 @@ class FusionGptAttentionPastBase(Fusion):
         super().__init__(model, "Attention", "LayerNormalization", "with past")
         self.num_heads = num_heads
         self.utils = FusionUtils(model)
-        self.casted_attention_mask = {}  # map from name of attention mask to the name that casted to int32
+        self.casted_attention_mask = (
+            {}
+        )  # map from name of attention mask to the name that casted to int32
 
     def match_past_pattern_1(self, concat_k, concat_v, output_name_to_node):
         # Pattern 1:
@@ -55,7 +74,9 @@ class FusionGptAttentionPastBase(Fusion):
         if parent.op_type == "Gather":
             gather_past_k = parent
         else:
-            past_k_nodes = self.model.match_parent_path(concat_k, ["Transpose", "Gather"], [0, 0])
+            past_k_nodes = self.model.match_parent_path(
+                concat_k, ["Transpose", "Gather"], [0, 0]
+            )
             if past_k_nodes is None:
                 logger.debug("match_past_pattern_1: failed match Transpose and Gather")
                 return None
@@ -106,27 +127,39 @@ class FusionGptAttentionPastBase(Fusion):
         opset_version = self.model.get_opset_version()
         if opset_version < 13:
             if not FusionUtils.check_node_attribute(squeeze, "axes", [0]):
-                logger.debug("match_past_pattern_2: axes != [0] for Squeeze in past path")
+                logger.debug(
+                    "match_past_pattern_2: axes != [0] for Squeeze in past path"
+                )
                 return None
 
             if not FusionUtils.check_node_attribute(split, "split", [1, 1]):
-                logger.debug("match_past_pattern_2: split != [1, 1] for Split in past path")
+                logger.debug(
+                    "match_past_pattern_2: split != [1, 1] for Split in past path"
+                )
                 return None
         else:
             if not self.utils.check_node_input_value(squeeze, 1, [0]):
-                logger.debug("match_past_pattern_2: axes != [0] for Squeeze in past path")
+                logger.debug(
+                    "match_past_pattern_2: axes != [0] for Squeeze in past path"
+                )
                 return None
 
             if not self.utils.check_node_input_value(split, 1, [1, 1]):
-                logger.debug("match_past_pattern_2: split != [1, 1] for Split in past path")
+                logger.debug(
+                    "match_past_pattern_2: split != [1, 1] for Split in past path"
+                )
                 return None
 
         if not FusionUtils.check_node_attribute(split, "axis", 0, default_value=0):
-            logger.debug("match_past_pattern_2: attribute axis of Split are not expected in past path")
+            logger.debug(
+                "match_past_pattern_2: attribute axis of Split are not expected in past path"
+            )
             return None
         past = split.input[0]
 
-        past_k_nodes = self.model.match_parent_path(concat_k, ["Squeeze", "Split"], [0, 0])
+        past_k_nodes = self.model.match_parent_path(
+            concat_k, ["Squeeze", "Split"], [0, 0]
+        )
         if past_k_nodes is None:
             logger.debug("match_past_pattern_2: failed to match past_k_nodes path")
             return None
@@ -159,10 +192,14 @@ class FusionGptAttentionPastBase(Fusion):
         if input_name in self.casted_attention_mask:
             attention_mask_input_name = self.casted_attention_mask[input_name]
         elif self.model.find_graph_input(input_name):
-            casted, attention_mask_input_name = self.utils.cast_graph_input_to_int32(input_name)
+            casted, attention_mask_input_name = self.utils.cast_graph_input_to_int32(
+                input_name
+            )
             self.casted_attention_mask[input_name] = attention_mask_input_name
         else:
-            attention_mask_input_name, cast_node = self.utils.cast_input_to_int32(input_name)
+            attention_mask_input_name, cast_node = self.utils.cast_input_to_int32(
+                input_name
+            )
             self.casted_attention_mask[input_name] = attention_mask_input_name
         return attention_mask_input_name
 
@@ -245,7 +282,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
 
         another_input = add_qkv.input[1 - return_indice[0]]
 
-        v_nodes = self.model.match_parent_path(matmul_qkv, ["Concat", "Transpose", "Reshape", "Split"], [1, 1, 0, 0])
+        v_nodes = self.model.match_parent_path(
+            matmul_qkv, ["Concat", "Transpose", "Reshape", "Split"], [1, 1, 0, 0]
+        )
         if v_nodes is None:
             logger.debug("fuse_attention: failed to match v path")
             return
@@ -284,7 +323,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
         slice_mask = None
         input_mask_nodes = None
         concat_k_to_match = None
-        qk_nodes = self.model.match_parent_path(matmul_qkv, ["Softmax", "Sub", "Mul", "Div", "MatMul"], [0, 0, 0, 0, 0])
+        qk_nodes = self.model.match_parent_path(
+            matmul_qkv, ["Softmax", "Sub", "Mul", "Div", "MatMul"], [0, 0, 0, 0, 0]
+        )
         if qk_nodes is not None:
             (softmax_qk, sub_qk, mul_qk, div_qk, matmul_qk) = qk_nodes
             mask_nodes = self.model.match_parent_path(
@@ -351,7 +392,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
                     output_name_to_node,
                 )  # yapf: disable
                 if input_mask_nodes is None:
-                    logger.debug("fuse_attention: failed to match input attention mask path")
+                    logger.debug(
+                        "fuse_attention: failed to match input attention mask path"
+                    )
                     return
 
             mask_nodes = self.model.match_parent_path(
@@ -376,7 +419,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
 
             slice_mask = mask_nodes[2]
 
-            div_or_concat = self.model.get_parent(mask_nodes[-1], 0, output_name_to_node)
+            div_or_concat = self.model.get_parent(
+                mask_nodes[-1], 0, output_name_to_node
+            )
             if div_or_concat.op_type == "Div":
                 div_mask = div_or_concat
                 if div_qk != div_mask:
@@ -388,19 +433,27 @@ class FusionGptAttention(FusionGptAttentionPastBase):
                 logger.debug("fuse_attention: failed to match mask path")
 
         # Validate that the mask data is either lower triangular (unidirectional) or all ones
-        mask_data = numpy_helper.to_array(self.model.get_initializer(slice_mask.input[0]))
+        mask_data = numpy_helper.to_array(
+            self.model.get_initializer(slice_mask.input[0])
+        )
         if not (
-            len(mask_data.shape) == 4 and mask_data.shape[:2] == (1, 1) and mask_data.shape[2] == mask_data.shape[3]
+            len(mask_data.shape) == 4
+            and mask_data.shape[:2] == (1, 1)
+            and mask_data.shape[2] == mask_data.shape[3]
         ):
             logger.debug("fuse_attention: skip since mask shape is not 1x1xWxW")
             return
         if np.allclose(mask_data, np.ones_like(mask_data)):
             is_unidirectional = False
         elif not np.allclose(mask_data, np.tril(np.ones_like(mask_data))):
-            logger.debug("fuse_attention: skip since mask is neither lower triangular nor ones")
+            logger.debug(
+                "fuse_attention: skip since mask is neither lower triangular nor ones"
+            )
             return
 
-        q_nodes = self.model.match_parent_path(matmul_qk, ["Transpose", "Reshape", "Split"], [0, 0, 0])
+        q_nodes = self.model.match_parent_path(
+            matmul_qk, ["Transpose", "Reshape", "Split"], [0, 0, 0]
+        )
         if q_nodes is None:
             logger.debug("fuse_attention: failed to match q path")
             return
@@ -409,7 +462,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
             logger.debug("fuse_attention: skip since split_fc != split_q")
             return
 
-        k_nodes = self.model.match_parent_path(matmul_qk, ["Concat", "Transpose", "Reshape", "Split"], [1, 1, 0, 0])
+        k_nodes = self.model.match_parent_path(
+            matmul_qk, ["Concat", "Transpose", "Reshape", "Split"], [1, 1, 0, 0]
+        )
         if k_nodes is None:
             # This pattern is from pytorch 1.7.1 and transformers 4.6.1
             k_nodes = self.model.match_parent_path(
@@ -438,9 +493,9 @@ class FusionGptAttention(FusionGptAttentionPastBase):
             attention_mask_input_name = self.cast_attention_mask(input_name)
 
         # Match past and present paths
-        past = self.match_past_pattern_1(concat_k, concat_v, output_name_to_node) or self.match_past_pattern_2(
+        past = self.match_past_pattern_1(
             concat_k, concat_v, output_name_to_node
-        )
+        ) or self.match_past_pattern_2(concat_k, concat_v, output_name_to_node)
         if past is None:
             logger.info("fuse_attention: failed to match past path")
             return
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_megatron.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_megatron.py
index 8510ae42937b77d7c7d26941d1b0be9abe8b9679..138a9c5ff495d59830ec0c7761a674d7beacb834 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_megatron.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_megatron.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,10 +21,11 @@
 from logging import getLogger
 
 import numpy as np
+from onnx import TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_gpt_attention import FusionGptAttentionPastBase
 from .fusion_utils import FusionUtils
-from onnx import TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -56,7 +73,9 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
         attention_node.attribute.extend(
             [
                 helper.make_attribute("num_heads", self.num_heads),
-                helper.make_attribute("unidirectional", 0),  # unidirectional shall not be ON for 4D attention mask
+                helper.make_attribute(
+                    "unidirectional", 0
+                ),  # unidirectional shall not be ON for 4D attention mask
             ]
         )
 
@@ -81,11 +100,15 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
         (mul_mask, sub_mask, last_slice_mask, slice_mask) = mask_nodes
 
         if mul_qk.input[1] != last_slice_mask.output[0]:
-            logger.debug("fuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]")
+            logger.debug(
+                "fuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]"
+            )
             return None
 
         if not self.utils.check_node_input_value(mul_mask, 1, 10000.0):
-            logger.debug("fuse_attention failed: mul_mask input 1 is not constant 10000.0")
+            logger.debug(
+                "fuse_attention failed: mul_mask input 1 is not constant 10000.0"
+            )
             return None
 
         if not self.utils.check_node_input_value(sub_mask, 0, 1.0):
@@ -97,23 +120,33 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
             return None
 
         if not self.utils.check_node_input_value(last_slice_mask, 1, [0]):
-            logger.debug("fuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]")
+            logger.debug(
+                "fuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]"
+            )
             return None
 
         if not self.utils.check_node_input_value(last_slice_mask, 3, [3]):
-            logger.debug("fuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]")
+            logger.debug(
+                "fuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]"
+            )
             return False
 
         if not self.utils.check_node_input_value(last_slice_mask, 4, [1]):
-            logger.debug("fuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]")
+            logger.debug(
+                "fuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]"
+            )
             return False
 
         if not self.utils.check_node_input_value(slice_mask, 3, [2]):
-            logger.debug("fuse_attention failed: slice_mask input 3 (axes) is not constant [2]")
+            logger.debug(
+                "fuse_attention failed: slice_mask input 3 (axes) is not constant [2]"
+            )
             return None
 
         if not self.utils.check_node_input_value(slice_mask, 4, [1]):
-            logger.debug("fuse_attention failed: slice_mask input 4 (steps) is not constant [1]")
+            logger.debug(
+                "fuse_attention failed: slice_mask input 4 (steps) is not constant [1]"
+            )
             return None
 
         last_slice_path = self.model.match_parent_path(
@@ -144,7 +177,10 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
             ["Unsqueeze", "Sub", "Gather", "Shape", "LayerNormalization"],
             [1, 0, 1, 0, 0],
         )
-        if first_slice_sub_1 is None or first_slice_sub_1[-1] != layernorm_before_attention:
+        if (
+            first_slice_sub_1 is None
+            or first_slice_sub_1[-1] != layernorm_before_attention
+        ):
             logger.debug("fuse_attention: failed to match last slice sub path 1")
             return None
 
@@ -199,10 +235,14 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
             layernorm_before_attention,
         ) = v_nodes
         if skip_input != layernorm_before_attention.input[0]:
-            logger.debug("fuse_attention: skip_input != layernorm_before_attention.input[0]")
+            logger.debug(
+                "fuse_attention: skip_input != layernorm_before_attention.input[0]"
+            )
             return
 
-        qk_nodes = self.model.match_parent_path(matmul_qkv, ["Softmax", "Sub", "Mul", "MatMul"], [0, 0, 0, 0])
+        qk_nodes = self.model.match_parent_path(
+            matmul_qkv, ["Softmax", "Sub", "Mul", "MatMul"], [0, 0, 0, 0]
+        )
         if qk_nodes is None:
             logger.debug("fuse_attention: failed to match qk path")
             return None
@@ -211,9 +251,13 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
             logger.debug("fuse_attention failed: softmax_qk axis != 3")
             return None
 
-        attention_mask = self.match_mask(sub_qk, mul_qk, matmul_qk, layernorm_before_attention)
+        attention_mask = self.match_mask(
+            sub_qk, mul_qk, matmul_qk, layernorm_before_attention
+        )
 
-        q_nodes = self.model.match_parent_path(matmul_qk, ["Div", "Transpose", "Reshape", "Split"], [0, 0, 0, 0])
+        q_nodes = self.model.match_parent_path(
+            matmul_qk, ["Div", "Transpose", "Reshape", "Split"], [0, 0, 0, 0]
+        )
         if q_nodes is None:
             logger.debug("fuse_attention: failed to match q path")
             return
@@ -249,19 +293,25 @@ class FusionGptAttentionMegatron(FusionGptAttentionPastBase):
 
         num_heads = value[2]
         if num_heads != self.num_heads:
-            logger.info(f"Detected num_heads={num_heads}. Ignore user specified value {self.num_heads}")
+            logger.info(
+                f"Detected num_heads={num_heads}. Ignore user specified value {self.num_heads}"
+            )
             self.num_heads = num_heads
 
         hidden_size_per_head = value[3]
         i, value = self.model.get_constant_input(div_k)
         expected_value = float(np.sqrt(np.sqrt(hidden_size_per_head)))
         if not is_close(value, expected_value):
-            logger.debug(f"fuse_attention: div_k value={value} expected={expected_value}")
+            logger.debug(
+                f"fuse_attention: div_k value={value} expected={expected_value}"
+            )
             return
 
         i, value = self.model.get_constant_input(div_q)
         if not is_close(value, expected_value):
-            logger.debug(f"fuse_attention: div_q value={value} expected={expected_value}")
+            logger.debug(
+                f"fuse_attention: div_q value={value} expected={expected_value}"
+            )
             return
 
         # Match past and present paths
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_no_past.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_no_past.py
index ca88f144fb2fc0095c03a79bc040e8a369255603..4e538cf5833d096635e461eae34ab35edd20d3b1 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_no_past.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_gpt_attention_no_past.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -55,7 +71,7 @@ class FusionGptAttentionNoPast(Fusion):
                 tensor_shape = [dim for dim in tensor_value.dims]
                 break
         head_dim = math.ceil(div_value * div_value)
-        hidden_size = tensor_shape[0]
+        hidden_size = tensor_shape[1]
         num_heads = hidden_size // head_dim
 
         return num_heads, hidden_size
@@ -219,20 +235,27 @@ class FusionGptAttentionNoPast(Fusion):
         if where_qk is None:
             return
 
+        global num_heads, hidden_size
         if self.where_qk_shared is None:
             where_qk.input[1] = mask_nodes[0].output[0]
             div_qk.output[0] = where_qk.output[0]
             add_qk.input[1 - mask_return_indices[0]] = div_qk.output[0]
             self.where_qk_shared = where_qk
             self.nodes_to_remove.extend([softmax_qk, add_qk, div_qk, matmul_qk])
+            
+            num_heads, hidden_size = self.get_num_heads_and_hidden_size(
+                custom_fc_after_attention, div_qk
+            )
+            self.nodes_to_remove.extend([k_nodes[0]])
+            self.nodes_to_remove.extend(v_nodes[:-2])
         else:
             self.nodes_to_remove.extend(
                 [softmax_qk, add_qk, where_qk, div_qk, matmul_qk]
-            )
+            )      
+            self.nodes_to_remove.extend(q_nodes)
+            self.nodes_to_remove.extend(k_nodes)
+            self.nodes_to_remove.extend(v_nodes[:-1])
 
-        num_heads, hidden_size = self.get_num_heads_and_hidden_size(
-            custom_fc_after_attention, div_qk
-        )
         new_node = self.create_attention_node(
             num_heads,
             hidden_size,
@@ -247,6 +270,4 @@ class FusionGptAttentionNoPast(Fusion):
         if reshape_2 is not None:
             self.nodes_to_remove.extend([reshape_2])
         self.nodes_to_remove.extend([transpose_qkv, matmul_qkv])
-        self.nodes_to_remove.extend(q_nodes)
-        self.nodes_to_remove.extend(k_nodes)
-        self.nodes_to_remove.extend(v_nodes[:-1])
+        
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_layernorm.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_layernorm.py
index 727a1aa50848f7008ebb752a1aebc765efbc0e61..d19c3aff604ed6f3ae673ffa0c67143b66e36aaf 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_layernorm.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_layernorm.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_options.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_options.py
index e0a1a535b13b391736f91064799b89f422eb600a..c0bb11b3bdd6bcbb994b8ad83501be2d9c1c4505 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_options.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_options.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -31,6 +47,7 @@ class FusionOptions:
         self.enable_format_roformer = False
         self.enable_gpt2_classify = False
         self.enable_vit = False
+        self.enable_omdet = False
         self.attention_mask_format = AttentionMaskFormat.AttentionMask
 
         if model_type == "gpt2":
@@ -42,6 +59,8 @@ class FusionOptions:
             self.enable_format_roformer = True
         elif model_type == "vit":
             self.enable_vit = True
+        elif model_type == "omdet":
+            self.enable_omdet = True
 
     def use_raw_attention_mask(self, use_raw_mask=True):
         if use_raw_mask:
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_attention.py
index b9b502acb41a9a34f31b4ace3c9d01ea218382ec..9afa3edbc37f2ddd7b15c3eb976ee1cd9e72e356 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -7,10 +23,11 @@ from logging import getLogger
 from typing import Tuple
 
 import numpy as np
+from onnx import NodeProto, helper
+
 from .fusion_attention import AttentionMask
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils, NumpyHelper
-from onnx import NodeProto, helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -48,19 +65,27 @@ class FusionQOrderedAttention(Fusion):
             constant_node = self.model.match_parent_path(reshape_q, ["Constant"], [1])
 
             if constant_node is None:
-                return self.num_heads, self.hidden_size  # Fall back to user specified value
+                return (
+                    self.num_heads,
+                    self.hidden_size,
+                )  # Fall back to user specified value
             else:
                 constant_node = constant_node[0]
 
                 if len(constant_node.attribute) != 1:
-                    return self.num_heads, self.hidden_size  # Fall back to user specified value
+                    return (
+                        self.num_heads,
+                        self.hidden_size,
+                    )  # Fall back to user specified value
 
                 # This is assuming it is a Tensor attribute (this is a safe assumption)
                 q_shape = constant_node.attribute[0].t
 
         q_shape_value = NumpyHelper.to_array(q_shape)
         if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
-            logger.debug(f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size].")
+            logger.debug(
+                f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
             return self.num_heads, self.hidden_size  # Fall back to user specified value
 
         num_heads = q_shape_value[2]
@@ -69,7 +94,9 @@ class FusionQOrderedAttention(Fusion):
 
         if self.num_heads > 0 and num_heads != self.num_heads:
             if self.num_heads_warning:
-                logger.warning(f"--num_heads is {self.num_heads}. Detected value is {num_heads}. Using detected value.")
+                logger.warning(
+                    f"--num_heads is {self.num_heads}. Detected value is {num_heads}. Using detected value."
+                )
                 self.num_heads_warning = False  # Do not show the warning more than once
 
         if self.hidden_size > 0 and hidden_size != self.hidden_size:
@@ -77,7 +104,9 @@ class FusionQOrderedAttention(Fusion):
                 logger.warning(
                     f"--hidden_size is {self.hidden_size}. Detected value is {hidden_size}. Using detected value."
                 )
-                self.hidden_size_warning = False  # Do not show the warning more than once
+                self.hidden_size_warning = (
+                    False  # Do not show the warning more than once
+                )
 
         return num_heads, hidden_size
 
@@ -101,7 +130,9 @@ class FusionQOrderedAttention(Fusion):
         )
 
         if dequantize_input is None:
-            logger.debug("fuse_qordered_attention: failed to match input qdq nodes path")
+            logger.debug(
+                "fuse_qordered_attention: failed to match input qdq nodes path"
+            )
             return
 
         dequantize_input = dequantize_input[-1]
@@ -109,7 +140,15 @@ class FusionQOrderedAttention(Fusion):
         # QKV nodes
         qkv_nodes = self.model.match_parent_path(
             start_node,
-            ["Add", "MatMul", "Reshape", "Transpose", "DequantizeLinear", "QuantizeLinear", "MatMul"],
+            [
+                "Add",
+                "MatMul",
+                "Reshape",
+                "Transpose",
+                "DequantizeLinear",
+                "QuantizeLinear",
+                "MatMul",
+            ],
             [None, None, 0, 0, 0, 0, 0],
         )
 
@@ -117,7 +156,15 @@ class FusionQOrderedAttention(Fusion):
             logger.debug("fuse_qordered_attention: failed to match qkv path")
             return
 
-        (_, projection_matmul, reshape_qkv, transpose_qkv, dequantize_qkv, quantize_qkv, matmul_qkv) = qkv_nodes
+        (
+            _,
+            projection_matmul,
+            reshape_qkv,
+            transpose_qkv,
+            dequantize_qkv,
+            quantize_qkv,
+            matmul_qkv,
+        ) = qkv_nodes
 
         # Make sure the Q/DQ has the proper zero points and constant per-tensor scales
         if not FusionUtils.check_qdq_node_for_fusion(quantize_qkv, self.model):
@@ -145,7 +192,14 @@ class FusionQOrderedAttention(Fusion):
         # V nodes
         v_nodes = self.model.match_parent_path(
             matmul_qkv,
-            ["Transpose", "Reshape", "DequantizeLinear", "QuantizeLinear", "Add", "MatMul"],
+            [
+                "Transpose",
+                "Reshape",
+                "DequantizeLinear",
+                "QuantizeLinear",
+                "Add",
+                "MatMul",
+            ],
             [1, 0, 0, 0, 0, None],
         )
 
@@ -163,7 +217,9 @@ class FusionQOrderedAttention(Fusion):
             return
 
         # V MatMul weight
-        dequantize_v_matmul_weight = self.model.match_parent_path(matmul_v, ["DequantizeLinear"], [1])
+        dequantize_v_matmul_weight = self.model.match_parent_path(
+            matmul_v, ["DequantizeLinear"], [1]
+        )
 
         if dequantize_v_matmul_weight is None:
             logger.debug("fuse_qordered_attention: failed to match v path")
@@ -176,7 +232,9 @@ class FusionQOrderedAttention(Fusion):
 
         # Make sure the upstream DequantizeLinear-1 has the proper zero points and scales
         # Per-channel scales are supported for weights alone
-        if not FusionUtils.check_qdq_node_for_fusion(dequantize_v_matmul_weight, self.model, False):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            dequantize_v_matmul_weight, self.model, False
+        ):
             return
 
         # QK nodes
@@ -226,7 +284,14 @@ class FusionQOrderedAttention(Fusion):
         # Q nodes
         q_nodes = self.model.match_parent_path(
             matmul_qk,
-            ["Transpose", "Reshape", "DequantizeLinear", "QuantizeLinear", "Add", "MatMul"],
+            [
+                "Transpose",
+                "Reshape",
+                "DequantizeLinear",
+                "QuantizeLinear",
+                "Add",
+                "MatMul",
+            ],
             [0, 0, 0, 0, 0, None],
         )
 
@@ -244,7 +309,9 @@ class FusionQOrderedAttention(Fusion):
             return
 
         # Q MatMul weight
-        dequantize_q_matmul_weight = self.model.match_parent_path(matmul_q, ["DequantizeLinear"], [1])
+        dequantize_q_matmul_weight = self.model.match_parent_path(
+            matmul_q, ["DequantizeLinear"], [1]
+        )
 
         if dequantize_q_matmul_weight is None:
             logger.debug("fuse_qordered_attention: failed to match q path")
@@ -257,13 +324,22 @@ class FusionQOrderedAttention(Fusion):
 
         # Make sure the upstream DequantizeLinear-1 has the proper zero points and scales
         # Per-channel scales are supported for weights alone
-        if not FusionUtils.check_qdq_node_for_fusion(dequantize_q_matmul_weight, self.model, False):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            dequantize_q_matmul_weight, self.model, False
+        ):
             return
 
         # K nodes
         k_nodes = self.model.match_parent_path(
             matmul_qk,
-            ["Transpose", "Reshape", "DequantizeLinear", "QuantizeLinear", "Add", "MatMul"],
+            [
+                "Transpose",
+                "Reshape",
+                "DequantizeLinear",
+                "QuantizeLinear",
+                "Add",
+                "MatMul",
+            ],
             [1, 0, 0, 0, 0, None],
         )
 
@@ -281,7 +357,9 @@ class FusionQOrderedAttention(Fusion):
             return
 
         # K MatMul weight
-        dequantize_k_matmul_weight = self.model.match_parent_path(matmul_k, ["DequantizeLinear"], [1])
+        dequantize_k_matmul_weight = self.model.match_parent_path(
+            matmul_k, ["DequantizeLinear"], [1]
+        )
 
         if dequantize_k_matmul_weight is None:
             logger.debug("fuse_qordered_attention: failed to match k path")
@@ -294,7 +372,9 @@ class FusionQOrderedAttention(Fusion):
 
         # Make sure the upstream DequantizeLinear-1 has the proper zero points and scales
         # Per-channel scales are supported for weights alone
-        if not FusionUtils.check_qdq_node_for_fusion(dequantize_k_matmul_weight, self.model, False):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            dequantize_k_matmul_weight, self.model, False
+        ):
             return
 
         # Mask nodes
@@ -320,7 +400,11 @@ class FusionQOrderedAttention(Fusion):
         vw_out_size = np.prod(vw.shape[1:])
 
         # Form QOrderedAttention node
-        if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_k.input[0] == root_input:
+        if (
+            matmul_v.input[0] == root_input
+            and matmul_q.input[0] == root_input
+            and matmul_k.input[0] == root_input
+        ):
             mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0])
 
             # Ascertain `num_heads` and `hidden_size`
@@ -372,13 +456,19 @@ class FusionQOrderedAttention(Fusion):
             # Transpose weight 'B' from order ROW to order COL
             # This offline transpose is needed only while using the CUDA EP
             # TODO: Make this fusion logic EP-agnostic ?
-            q_weight_tensor = self.model.get_initializer(dequantize_q_matmul_weight.input[0])
+            q_weight_tensor = self.model.get_initializer(
+                dequantize_q_matmul_weight.input[0]
+            )
             FusionUtils.transpose_2d_int8_tensor(q_weight_tensor)
 
-            k_weight_tensor = self.model.get_initializer(dequantize_k_matmul_weight.input[0])
+            k_weight_tensor = self.model.get_initializer(
+                dequantize_k_matmul_weight.input[0]
+            )
             FusionUtils.transpose_2d_int8_tensor(k_weight_tensor)
 
-            v_weight_tensor = self.model.get_initializer(dequantize_v_matmul_weight.input[0])
+            v_weight_tensor = self.model.get_initializer(
+                dequantize_v_matmul_weight.input[0]
+            )
             FusionUtils.transpose_2d_int8_tensor(v_weight_tensor)
 
             # Name and create Attention node
@@ -391,15 +481,25 @@ class FusionQOrderedAttention(Fusion):
                 name=attention_node_name,
             )
 
-            self.model.replace_node_input(dequantize_qkv, dequantize_qkv.input[0], attention_node.output[0])
-            self.model.replace_node_input(projection_matmul, projection_matmul.input[0], dequantize_qkv.output[0])
+            self.model.replace_node_input(
+                dequantize_qkv, dequantize_qkv.input[0], attention_node.output[0]
+            )
+            self.model.replace_node_input(
+                projection_matmul, projection_matmul.input[0], dequantize_qkv.output[0]
+            )
 
-            attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
+            attention_node.attribute.extend(
+                [helper.make_attribute("num_heads", num_heads)]
+            )
             attention_node.attribute.extend([helper.make_attribute("order_input", 1)])
             attention_node.attribute.extend([helper.make_attribute("order_weight", 0)])
             attention_node.attribute.extend([helper.make_attribute("order_output", 1)])
             attention_node.attribute.extend(
-                [helper.make_attribute("qkv_hidden_sizes", [qw_out_size, kw_out_size, vw_out_size])]
+                [
+                    helper.make_attribute(
+                        "qkv_hidden_sizes", [qw_out_size, kw_out_size, vw_out_size]
+                    )
+                ]
             )
 
             attention_node.domain = "com.microsoft"
@@ -407,13 +507,19 @@ class FusionQOrderedAttention(Fusion):
             self.nodes_to_add.append(attention_node)
             self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
 
-            self.nodes_to_remove.extend([reshape_qkv, transpose_qkv, quantize_qkv, matmul_qkv])
+            self.nodes_to_remove.extend(
+                [reshape_qkv, transpose_qkv, quantize_qkv, matmul_qkv]
+            )
             self.nodes_to_remove.extend(qk_nodes)
             self.nodes_to_remove.extend(q_nodes)
             self.nodes_to_remove.extend(k_nodes)
             self.nodes_to_remove.extend(v_nodes)
             self.nodes_to_remove.extend(
-                [dequantize_q_matmul_weight, dequantize_k_matmul_weight, dequantize_v_matmul_weight]
+                [
+                    dequantize_q_matmul_weight,
+                    dequantize_k_matmul_weight,
+                    dequantize_v_matmul_weight,
+                ]
             )
 
             # Use prune graph to remove mask nodes since they are shared by all attention nodes.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_gelu.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_gelu.py
index 3ce59f784bc9242213c9e0dc699764d8c50e0fb2..ebd165c4bc5da002eb53b2376c1e69facf40dec4 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_gelu.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_gelu.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,9 +22,10 @@
 from logging import getLogger
 from typing import Dict
 
+from onnx import helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -51,7 +68,9 @@ class FusionQOrderedGelu(Fusion):
         if len(gelu_children) == 2:
             downstream_shape_node = gelu_children[1]
 
-        if not FusionUtils.check_qdq_node_for_fusion(downstream_quantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            downstream_quantize_node, self.model
+        ):
             return
 
         # The first input to Gelu should flow through a DequantizeLinear node
@@ -66,12 +85,16 @@ class FusionQOrderedGelu(Fusion):
 
         upstream_dequantize_node = first_input_parent_nodes[0]
 
-        if not FusionUtils.check_qdq_node_for_fusion(upstream_dequantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            upstream_dequantize_node, self.model
+        ):
             return
 
         # Fusion logic
         subgraph_nodes = [node]  # Gelu/FastGelu
-        subgraph_nodes.extend([downstream_quantize_node, upstream_dequantize_node])  # Relevant Q, DQ nodes
+        subgraph_nodes.extend(
+            [downstream_quantize_node, upstream_dequantize_node]
+        )  # Relevant Q, DQ nodes
 
         if not self.model.is_safe_to_fuse_nodes(
             subgraph_nodes,
@@ -94,7 +117,9 @@ class FusionQOrderedGelu(Fusion):
                 downstream_quantize_node.input[1],
             ],
             outputs=[downstream_quantize_node.output[0]],
-            name=self.model.create_node_name("QOrderedGelu", name_prefix="QOrderedGelu"),
+            name=self.model.create_node_name(
+                "QOrderedGelu", name_prefix="QOrderedGelu"
+            ),
         )
 
         # Arrange the downstream Shape's input to be fed from the
@@ -102,7 +127,9 @@ class FusionQOrderedGelu(Fusion):
         # be deemed safe
         if downstream_shape_node is not None:
             self.model.replace_node_input(
-                downstream_shape_node, downstream_shape_node.input[0], downstream_quantize_node.output[0]
+                downstream_shape_node,
+                downstream_shape_node.input[0],
+                downstream_quantize_node.output[0],
             )
 
         # TODO: We only support CuBlasLt order ORDER_ROW for now.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_layernorm.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_layernorm.py
index 08def4a20f205658df7ca9371e9fb9509103657b..94e38a0f5b549cb217359926172eb4aa510ad68b 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_layernorm.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_layernorm.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -5,9 +21,10 @@
 from logging import getLogger
 from typing import Dict
 
+from onnx import helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -36,7 +53,11 @@ class FusionQOrderedLayerNormalization(Fusion):
         # Should have 2 children - QuantizeLinear + Shape
         if not (
             (len(children) == 1 and children[0].op_type == "QuantizeLinear")
-            or (len(children) == 2 and children[0].op_type == "QuantizeLinear" and children[1].op_type == "Shape")
+            or (
+                len(children) == 2
+                and children[0].op_type == "QuantizeLinear"
+                and children[1].op_type == "Shape"
+            )
         ):
             return
 
@@ -46,7 +67,9 @@ class FusionQOrderedLayerNormalization(Fusion):
         if len(children) == 2:
             downstream_shape_node = children[1]
 
-        if not FusionUtils.check_qdq_node_for_fusion(downstream_quantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            downstream_quantize_node, self.model
+        ):
             return
 
         # The first input to LayerNormalization should flow through a DequantizeLinear node
@@ -61,19 +84,27 @@ class FusionQOrderedLayerNormalization(Fusion):
 
         upstream_dequantize_node = first_input_parent_nodes[0]
 
-        if not FusionUtils.check_qdq_node_for_fusion(upstream_dequantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            upstream_dequantize_node, self.model
+        ):
             return
 
         # Fusion logic
         subgraph_nodes = [node]  # LayerNormalization
-        subgraph_nodes.extend([downstream_quantize_node])  # Q node after LayerNormalization
+        subgraph_nodes.extend(
+            [downstream_quantize_node]
+        )  # Q node after LayerNormalization
 
-        upstream_dequantize_node_children = self.model.get_children(upstream_dequantize_node, input_name_to_nodes)
+        upstream_dequantize_node_children = self.model.get_children(
+            upstream_dequantize_node, input_name_to_nodes
+        )
 
         # In GPT2, the DQ node will be feeding a residual downstream Add and hence,
         # we do not want to remove it
         if len(upstream_dequantize_node_children) == 1:
-            subgraph_nodes.extend([upstream_dequantize_node])  # DQ node before LayerNormalization
+            subgraph_nodes.extend(
+                [upstream_dequantize_node]
+            )  # DQ node before LayerNormalization
 
         if not self.model.is_safe_to_fuse_nodes(
             subgraph_nodes,
@@ -83,7 +114,9 @@ class FusionQOrderedLayerNormalization(Fusion):
             input_name_to_nodes,
             output_name_to_node,
         ):
-            logger.debug(f"It is not safe to fuse QOrderedLayerNormalization node. Skip")
+            logger.debug(
+                f"It is not safe to fuse QOrderedLayerNormalization node. Skip"
+            )
             return
 
         self.nodes_to_remove.extend(subgraph_nodes)
@@ -98,7 +131,9 @@ class FusionQOrderedLayerNormalization(Fusion):
                 downstream_quantize_node.input[1],
             ],
             outputs=[downstream_quantize_node.output[0]],
-            name=self.model.create_node_name("QOrderedLayerNormalization", name_prefix="QOrderedLayerNormalization"),
+            name=self.model.create_node_name(
+                "QOrderedLayerNormalization", name_prefix="QOrderedLayerNormalization"
+            ),
         )
 
         # Arrange the downstream Shape's input to be fed from the
@@ -106,7 +141,9 @@ class FusionQOrderedLayerNormalization(Fusion):
         # be deemed safe
         if downstream_shape_node is not None:
             self.model.replace_node_input(
-                downstream_shape_node, downstream_shape_node.input[0], downstream_quantize_node.output[0]
+                downstream_shape_node,
+                downstream_shape_node.input[0],
+                downstream_quantize_node.output[0],
             )
 
         # TODO: We only support CuBlasLt order ORDER_ROW for now.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_matmul.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_matmul.py
index de0196c53b3f6e8c38301adae476dcfd6f524aa3..8c8050e1cdfb0061b734b1224aa0006b1c09cdef 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_matmul.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_qordered_matmul.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,9 +22,10 @@
 from logging import getLogger
 from typing import Dict
 
+from onnx import helper
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import helper
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -53,9 +70,14 @@ class FusionQOrderedMatMul(Fusion):
         if bias_add_child.op_type == "Add":
             residual_add_node = bias_add_child
 
-            residual_add_children = self.model.get_children(residual_add_node, input_name_to_nodes)
+            residual_add_children = self.model.get_children(
+                residual_add_node, input_name_to_nodes
+            )
 
-            if len(residual_add_children) != 1 or residual_add_children[0].op_type != "QuantizeLinear":
+            if (
+                len(residual_add_children) != 1
+                or residual_add_children[0].op_type != "QuantizeLinear"
+            ):
                 return
 
             downstream_quantize_node = residual_add_children[0]
@@ -67,7 +89,9 @@ class FusionQOrderedMatMul(Fusion):
             return
 
         # Make sure the downstream QuantizeLinear has the proper zero points and scales
-        if not FusionUtils.check_qdq_node_for_fusion(downstream_quantize_node, self.model):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            downstream_quantize_node, self.model
+        ):
             return
 
         # The first input to MatMul should flow through a DequantizeLinear node
@@ -84,7 +108,12 @@ class FusionQOrderedMatMul(Fusion):
         if first_path_id < 0:
             first_path_id, first_input_parent_nodes, _ = self.model.match_parent_paths(
                 node,
-                [(["Reshape", "Transpose", "DequantizeLinear", "QuantizeLinear"], [0, 0, 0, 0])],
+                [
+                    (
+                        ["Reshape", "Transpose", "DequantizeLinear", "QuantizeLinear"],
+                        [0, 0, 0, 0],
+                    )
+                ],
                 output_name_to_node,
             )
 
@@ -107,7 +136,17 @@ class FusionQOrderedMatMul(Fusion):
 
         weight_path_id, weight_nodes, _ = self.model.match_parent_paths(
             node,
-            [(["DequantizeLinear", "QuantizeLinear", "Transpose", "DequantizeLinear"], [1, 0, 0, 0])],
+            [
+                (
+                    [
+                        "DequantizeLinear",
+                        "QuantizeLinear",
+                        "Transpose",
+                        "DequantizeLinear",
+                    ],
+                    [1, 0, 0, 0],
+                )
+            ],
             output_name_to_node,
         )
 
@@ -132,14 +171,20 @@ class FusionQOrderedMatMul(Fusion):
 
         # Make sure the upstream DequantizeLinear-1 has the proper zero points and scales
         # Per-channel scales are supported for weights alone
-        if not FusionUtils.check_qdq_node_for_fusion(dequantize_node_1, self.model, False):
+        if not FusionUtils.check_qdq_node_for_fusion(
+            dequantize_node_1, self.model, False
+        ):
             return
 
         # Make sure the upstream flow into the Residual Add node flows through a DQ node
         residual_add_dequantize_node = None
 
         if residual_add_node is not None:
-            residual_path_id, residual_input_parent_nodes, _ = self.model.match_parent_paths(
+            (
+                residual_path_id,
+                residual_input_parent_nodes,
+                _,
+            ) = self.model.match_parent_paths(
                 residual_add_node,
                 [
                     (["DequantizeLinear"], [1]),
@@ -153,8 +198,11 @@ class FusionQOrderedMatMul(Fusion):
             residual_add_dequantize_node = residual_input_parent_nodes[0]
 
         # Make sure the upstream DequantizeLinear to the Residual Add has the proper zero points and scales
-        if residual_add_dequantize_node is not None and not FusionUtils.check_qdq_node_for_fusion(
-            residual_add_dequantize_node, self.model
+        if (
+            residual_add_dequantize_node is not None
+            and not FusionUtils.check_qdq_node_for_fusion(
+                residual_add_dequantize_node, self.model
+            )
         ):
             return
 
@@ -168,18 +216,25 @@ class FusionQOrderedMatMul(Fusion):
         subgraph_nodes.extend([downstream_quantize_node])  # Downstream Q node
 
         if not self.model.is_safe_to_fuse_nodes(
-            subgraph_nodes, downstream_quantize_node.output, input_name_to_nodes, output_name_to_node
+            subgraph_nodes,
+            downstream_quantize_node.output,
+            input_name_to_nodes,
+            output_name_to_node,
         ):
             logger.debug(f"It is not safe to fuse QOrderedMatMul node. Skip")
             return
 
         # Deal with the case where-in the Attention subgraph is not fused
         if transpose_node_0 is not None:
-            self.model.replace_node_input(transpose_node_0, transpose_node_0.input[0], dequantize_node_0.input[0])
+            self.model.replace_node_input(
+                transpose_node_0, transpose_node_0.input[0], dequantize_node_0.input[0]
+            )
 
         # Make inputs
         fused_node_inputs = [
-            reshape_node_0.output[0] if reshape_node_0 is not None else dequantize_node_0.input[0],
+            reshape_node_0.output[0]
+            if reshape_node_0 is not None
+            else dequantize_node_0.input[0],
             dequantize_node_0.input[1],
             dequantize_node_1.input[0],
             dequantize_node_1.input[1],
@@ -203,7 +258,9 @@ class FusionQOrderedMatMul(Fusion):
             "QOrderedMatMul",
             inputs=fused_node_inputs,
             outputs=[downstream_quantize_node.output[0]],
-            name=self.model.create_node_name("QOrderedMatMul", name_prefix="QOrderedMatMul"),
+            name=self.model.create_node_name(
+                "QOrderedMatMul", name_prefix="QOrderedMatMul"
+            ),
         )
 
         fused_node.attribute.extend([helper.make_attribute("order_A", 1)])
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_reshape.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_reshape.py
index d2b46c16cac89d23bbdbea86b0b418bff792fcdc..2a5bf73fdf07f223be18e7bbaf20f9623ebb3fdc 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_reshape.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_reshape.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,8 +22,9 @@
 from logging import getLogger
 
 import numpy as np
-from .fusion_base import Fusion
 from onnx import TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -19,7 +36,7 @@ class FusionReshape(Fusion):
         self.prune_graph: bool = False
 
     def replace_reshape_node(self, shape, reshape_node, concat_node):
-        shape_value = np.asarray(shape, dtype=np.int64)
+        shape_value = np.asarray([int(x) if isinstance(x, np.ndarray) else x for x in shape], dtype=np.int64)
         constant_shape_name = self.model.create_node_name("Constant", "constant_shape")
         new_node = helper.make_node(
             "Constant",
@@ -44,7 +61,11 @@ class FusionReshape(Fusion):
             return
 
         concat_node = output_name_to_node[reshape_node.input[1]]
-        if concat_node.op_type != "Concat" or len(concat_node.input) < 3 or len(concat_node.input) > 4:
+        if (
+            concat_node.op_type != "Concat"
+            or len(concat_node.input) < 3
+            or len(concat_node.input) > 4
+        ):
             return
 
         path0 = self.model.match_parent_path(
@@ -83,7 +104,10 @@ class FusionReshape(Fusion):
         path2 = []
         path3 = []
         shape_nodes = [shape_0, shape_1]
-        if len(concat_node.input) == 3 and self.model.get_initializer(concat_node.input[2]) is None:
+        if (
+            len(concat_node.input) == 3
+            and self.model.get_initializer(concat_node.input[2]) is None
+        ):
             path2 = self.model.match_parent_path(
                 concat_node,
                 ["Unsqueeze", "Mul", "Gather", "Shape"],
@@ -128,7 +152,10 @@ class FusionReshape(Fusion):
             else:
                 shape.append(concat_value)
 
-        if len(concat_node.input) == 4 and self.model.get_initializer(concat_node.input[3]) is None:
+        if (
+            len(concat_node.input) == 4
+            and self.model.get_initializer(concat_node.input[3]) is None
+        ):
             if -1 in shape:
                 return
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rms_norm.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rms_norm.py
index c831f15c58907e4069fcbebe7d23078c7b47bf06..b3ec51a5a25af26a36ef9fc0015b80104e4cd67f 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rms_norm.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rms_norm.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 import logging
 from typing import Dict
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_roformer_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_roformer_attention.py
index a5079c2d38c8fd465e49ca51735c570706c9bd40..1d99595e8e8d9dc1cde4da1c66f266251d0919ca 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_roformer_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_roformer_attention.py
@@ -115,9 +115,12 @@ class FusionRoformerCrossAttention(Fusion):
         attention_node.domain = "com.iluvatar"
         attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
         attention_node.attribute.extend([helper.make_attribute("has_mask", 1)])
+        attention_node.attribute.extend([helper.make_attribute("type_mask", 4)])  #3:float mask 4:int32 mask
+        attention_node.attribute.extend([helper.make_attribute("scale", 1.0 / 8)]) #1 /sqrt(num_heads)
+        
         attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
         attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
-
+        
         return attention_node
 
     def get_shape(self, edge_name):
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rope.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rope.py
index 2ca376c39904b298973f403c2989418ec17e460e..dfa14d0e25951f7ce72c719c452ebb56232e14a7 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rope.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_rope.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_shape.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_shape.py
index b47be680f13948c63ea73694d443488cf992daa1..727d4b82d44805f6d52c8e7fd72d94acf846e73e 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_shape.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_shape.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,9 +22,10 @@
 from logging import getLogger
 from typing import Dict, List, Union
 
+from onnx import NodeProto, TensorProto
+
 from .fusion_base import Fusion
 from .fusion_utils import FusionUtils
-from onnx import NodeProto, TensorProto
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -21,7 +38,9 @@ class FusionShape(Fusion):
         self.shape_infer = None
         self.shape_infer_done = False
 
-    def get_dimensions_from_tensor_proto(self, tensor_proto: TensorProto) -> Union[int, None]:
+    def get_dimensions_from_tensor_proto(
+        self, tensor_proto: TensorProto
+    ) -> Union[int, None]:
         if tensor_proto.type.tensor_type.HasField("shape"):
             return len(tensor_proto.type.tensor_type.shape.dim)
         else:
@@ -37,7 +56,9 @@ class FusionShape(Fusion):
             self.shape_infer_done = True
 
         if self.shape_infer is not None:
-            return self.get_dimensions_from_tensor_proto(self.shape_infer.known_vi_[input_name])
+            return self.get_dimensions_from_tensor_proto(
+                self.shape_infer.known_vi_[input_name]
+            )
 
         return None
 
@@ -58,7 +79,7 @@ class FusionShape(Fusion):
                 |                |
             Unsqueeze(axes=0)   Unsqueeze(axes=0)
                    \          /
-                      Concat 
+                      Concat
                         |
 
         into  (2d_input) --> Shape -->
@@ -88,7 +109,9 @@ class FusionShape(Fusion):
             elif shape.input[0] != root:
                 return
 
-            if not FusionUtils.check_node_attribute(unsqueeze, "axis", 0, default_value=0):
+            if not FusionUtils.check_node_attribute(
+                unsqueeze, "axis", 0, default_value=0
+            ):
                 return
 
             if opset_version < 13:
@@ -101,7 +124,9 @@ class FusionShape(Fusion):
             value = self.model.get_constant_value(gather.input[1])
             from numpy import array_equal, ndarray
 
-            if not (isinstance(value, ndarray) and value.size == 1 and value.item() == i):
+            if not (
+                isinstance(value, ndarray) and value.size == 1 and value.item() == i
+            ):
                 return
 
         if self.model.find_graph_output(concat_node.output[0]) is None:
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_skiplayernorm.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_skiplayernorm.py
index 5868964467ee7555ea3b47603402f4034885c590..d0797b26dc6edfabd91f4bd9d07d0c1da383ef8b 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_skiplayernorm.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_skiplayernorm.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV.py
index a74fe9ee0a86a88f271b085ae1b946b97b394e7e..436257c3ce09b25790b132b6f918afebc63d9380 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV_update_KVcache.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV_update_KVcache.py
new file mode 100644
index 0000000000000000000000000000000000000000..4152eef6e6371dd4da27b5315bf5bd741d0749d1
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_splitQKV_update_KVcache.py
@@ -0,0 +1,128 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+from logging import getLogger
+from typing import Tuple, Union
+
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
+from .fusion_base import Fusion
+from .fusion_utils import NumpyHelper
+from .onnx_model import OnnxModel
+
+logger = getLogger(__name__)
+
+
+class FusionSplitQKVUpdateKVCache(Fusion):
+    """
+    Fuse FusionSplitQKVUpdateKVCache
+    """
+
+    def __init__(self, model: OnnxModel, hidden_size: int, num_heads: int):
+        super().__init__(
+            model, "SplitQKVUpdateKVCache_IxRT", "CustomQkvCrossToContext_IxRT"
+        )
+
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+
+    def create_node(
+        self,
+        inputs: list,
+        outputs: list,
+    ) -> Union[NodeProto, None]:
+        """Create an XSoftmax node.
+
+        Args:
+            data_input (str): data input name
+            mask_input (str): max input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+        node_name = self.model.create_node_name("SplitQKVUpdateKVCache_IxRT")
+
+        new_node = helper.make_node(
+            "SplitQKVUpdateKVCache_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=node_name,
+        )
+        new_node.domain = "com.iluvatar"
+        new_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        new_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        new_node.attribute.extend([helper.make_attribute("num_head", self.num_heads)])
+        new_node.attribute.extend(
+            [helper.make_attribute("head_dim", self.hidden_size // self.num_heads)]
+        )
+
+        return new_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        query_paths = {
+            "query_path": (
+                ["Transpose", "Reshape", "Split"],
+                [0, 0, None],
+            ),
+        }
+
+        key_paths = {
+            "key_path": (
+                ["Concat", "Transpose", "Reshape", "Split"],
+                [1, None, 0, None],
+            ),
+        }
+
+        value_paths = {
+            "value_path": (
+                ["Concat", "Transpose", "Reshape", "Split"],
+                [2, None, 0, None],
+            ),
+        }
+
+        q_nodes, q_path = self.match_parent_path_from_dict(node, query_paths)
+
+        k_nodes, k_path = self.match_parent_path_from_dict(node, key_paths)
+
+        v_nodes, v_path = self.match_parent_path_from_dict(node, value_paths)
+
+        if (q_nodes is not None) and (k_nodes is not None) and (v_nodes is not None):
+            (q_transpose_node, q_reshape_node, q_split_node) = q_nodes
+            (k_concat_node, k_transpose_node, k_reshape_node, k_split_node) = k_nodes
+
+            (v_concat_node, v_transpose_node, v_reshape_node, v_split_node) = v_nodes
+
+            inputs = [
+                q_split_node.input[0],
+                k_concat_node.input[0],
+                v_concat_node.input[0],
+            ]
+
+            outputs = [
+                q_transpose_node.output[0],
+                k_concat_node.output[0],
+                v_concat_node.output[0],
+            ]
+
+            new_node = self.create_node(inputs, outputs)
+
+            self.nodes_to_add.append(new_node)
+            self.node_name_to_graph_name[new_node.name] = self.this_graph_name
+            self.nodes_to_remove.append(q_transpose_node)
+            self.nodes_to_remove.append(q_reshape_node)
+            self.nodes_to_remove.append(q_split_node)
+
+            self.nodes_to_remove.append(k_concat_node)
+            self.nodes_to_remove.append(k_transpose_node)
+            self.nodes_to_remove.append(k_reshape_node)
+
+            self.nodes_to_remove.append(v_concat_node)
+            self.nodes_to_remove.append(v_transpose_node)
+            self.nodes_to_remove.append(v_reshape_node)
+
+        else:
+            return
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_swinl_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_swinl_attention.py
index 8edb9a5ada34fdc7ae8a5f8b0fecc0d57b57257b..e446a69a636ed38e6e869a15ba6196d727b6d855 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_swinl_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_swinl_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -6,16 +22,17 @@ from enum import Enum
 from logging import getLogger
 from os import name
 from sys import path
-from typing import Tuple, Union, List
+from typing import List, Tuple, Union
 
 import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_options import AttentionMaskFormat
 from .fusion_utils import FusionUtils, NumpyHelper
-from onnx import NodeProto, TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
-import onnx
 
 logger = getLogger(__name__)
 
@@ -34,10 +51,14 @@ class FusionSwinLAttention(Fusion):
     """
 
     def __init__(
-            self,
-            model: OnnxModel,
+        self,
+        model: OnnxModel,
     ):
-        super().__init__(model, "CustomQKVToContextPluginDynamic_IxRT", ["CustomFCPluginDynamic_IxRT"])
+        super().__init__(
+            model,
+            "CustomQKVToContextPluginDynamic_IxRT",
+            ["CustomFCPluginDynamic_IxRT"],
+        )
 
         # Flags to show warning only once
         self.num_heads_warning = True
@@ -61,7 +82,9 @@ class FusionSwinLAttention(Fusion):
 
         v_shape_value = NumpyHelper.to_array(v_shape)
         if len(v_shape_value) != 3 or (v_shape_value[1] <= 0 or v_shape_value[2] <= 0):
-            logger.debug(f"v_shape_value={v_shape_value}. Expected value are like [0, 0, num_heads, head_size].")
+            logger.debug(
+                f"v_shape_value={v_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
             return self.num_heads, self.hidden_size  # Fall back to user specified value
 
         num_heads = 1
@@ -74,11 +97,11 @@ class FusionSwinLAttention(Fusion):
         return num_heads, hidden_size
 
     def create_attention_node(
-            self,
-            num_heads: int,
-            hidden_size: int,
-            inputs: List[str],
-            output: str,
+        self,
+        num_heads: int,
+        hidden_size: int,
+        inputs: List[str],
+        output: str,
     ) -> Union[NodeProto, None]:
         """Create an Attention node.
 
@@ -94,7 +117,9 @@ class FusionSwinLAttention(Fusion):
         assert num_heads > 0
 
         if hidden_size > 0 and (hidden_size % num_heads) != 0:
-            logger.debug(f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}")
+            logger.debug(
+                f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}"
+            )
             return None
 
         attention_node_name = self.model.create_node_name("Attention")
@@ -108,7 +133,9 @@ class FusionSwinLAttention(Fusion):
         attention_node.domain = "com.iluvatar"
         attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
         attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
-        attention_node.attribute.extend([helper.make_attribute("hidden_size", hidden_size)])
+        attention_node.attribute.extend(
+            [helper.make_attribute("hidden_size", hidden_size)]
+        )
         attention_node.attribute.extend([helper.make_attribute("has_mask", 1)])
         attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
         attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
@@ -120,8 +147,7 @@ class FusionSwinLAttention(Fusion):
         self.fuse_pattern2(normalize_node, input_name_to_nodes, output_name_to_node)
 
     def fuse_pattern2(self, normalize_node, input_name_to_nodes, output_name_to_node):
-        """ match Swin-L pattern and fuse them to CustomFC --> Attention --> CustomFC
-         """
+        """match Swin-L pattern and fuse them to CustomFC --> Attention --> CustomFC"""
         logger.debug("fuse swin-L attention pass")
         # 1. CustomFCPluginDynamic_IxRT node as start, go up to find a pattern for swin-L pattern
         start_node = normalize_node
@@ -132,49 +158,76 @@ class FusionSwinLAttention(Fusion):
         if qkv_nodes is None:
             logger.debug("fuse_attention: failed to match qkv path")
             return
-        assert qkv_path == 'path1', 'abnormal qkv path'
+        assert qkv_path == "path1", "abnormal qkv path"
         reshape_qkv, transpose_qkv, matmul_qkv = qkv_nodes
 
         # 2. MatMul as start, go up to find v path
         v_paths = {
-            "path1": (["Transpose", "Reshape", "CustomFCPluginDynamic_IxRT"], [None, 0, 0])
+            "path1": (
+                ["Transpose", "Reshape", "CustomFCPluginDynamic_IxRT"],
+                [None, 0, 0],
+            )
         }
         v_nodes, v_path = self.match_parent_path_from_dict(matmul_qkv, v_paths)
         if not v_nodes:
             logger.debug("fuse_attention: failed to match v path")
             return
-        assert v_path == 'path1', 'abnormal v path'
+        assert v_path == "path1", "abnormal v path"
 
         # 3. MatMul as start, go up to find q,k paths
         # q path
         q_paths = {
-            "path1": (["Softmax", "Add", "Div", "MatMul", "Transpose", "Reshape", "CustomFCPluginDynamic_IxRT"],
-                      [None, 0, 0, 0, 0, 0, 0]),
+            "path1": (
+                [
+                    "Softmax",
+                    "Add",
+                    "Div",
+                    "MatMul",
+                    "Transpose",
+                    "Reshape",
+                    "CustomFCPluginDynamic_IxRT",
+                ],
+                [None, 0, 0, 0, 0, 0, 0],
+            ),
         }
         q_nodes, q_path = self.match_parent_path_from_dict(matmul_qkv, q_paths)
         if not q_nodes:
             logger.debug("fuse_attention: failed to match q path")
             return
-        assert q_path == 'path1', 'abnormal q paths found'
+        assert q_path == "path1", "abnormal q paths found"
 
         # get Add(bias) input name as fused Attention inputs
         add_op, div_op = q_nodes[1], q_nodes[2]
-        relative_position_bias_name = add_op.input[1] if add_op.input[0] == div_op.output[0] else add_op.input[0]
+        relative_position_bias_name = (
+            add_op.input[1] if add_op.input[0] == div_op.output[0] else add_op.input[0]
+        )
 
         # k path
         k_paths = {
-            "path2": (["Softmax", "Add", "Div", "MatMul", "Transpose", "Reshape", "CustomFCPluginDynamic_IxRT"],
-                      [None, 0, 0, 0, 1, 0, 0])
+            "path2": (
+                [
+                    "Softmax",
+                    "Add",
+                    "Div",
+                    "MatMul",
+                    "Transpose",
+                    "Reshape",
+                    "CustomFCPluginDynamic_IxRT",
+                ],
+                [None, 0, 0, 0, 1, 0, 0],
+            )
         }
         k_nodes, k_path = self.match_parent_path_from_dict(matmul_qkv, k_paths)
         if not k_nodes:
             logger.debug("fuse_attention: failed to match k path")
             return
-        assert k_path == 'path2', 'abnormal k paths found'
+        assert k_path == "path2", "abnormal k paths found"
         # 4. Fuse 3 CustomFC into one, and fuse attention
         # Fuse FCs
         fc_nodes = [q_nodes[-1], k_nodes[-1], v_nodes[-1]]
-        weight = self.fuse_tensor_in_node_attrs(fc_nodes, "W", q_nodes[-1].name + "_Weight")
+        weight = self.fuse_tensor_in_node_attrs(
+            fc_nodes, "W", q_nodes[-1].name + "_Weight"
+        )
         bias = self.fuse_tensor_in_node_attrs(fc_nodes, "B", q_nodes[-1].name + "_Bias")
         fused_node = helper.make_node(
             "CustomFCPluginDynamic_IxRT",
@@ -183,7 +236,9 @@ class FusionSwinLAttention(Fusion):
             name=self.model.create_node_name("CustomFC", "MatMul_AddBias_"),
         )
         fused_node.domain = "com.iluvatar"
-        fused_node.attribute.extend([helper.make_attribute("out_dims", numpy_helper.to_array(bias).shape[0])])
+        fused_node.attribute.extend(
+            [helper.make_attribute("out_dims", numpy_helper.to_array(bias).shape[0])]
+        )
         fused_node.attribute.extend([helper.make_attribute("type_id", 2)])
         fused_node.attribute.extend([helper.make_attribute("W", weight)])
         fused_node.attribute.extend([helper.make_attribute("B", bias)])
@@ -205,12 +260,13 @@ class FusionSwinLAttention(Fusion):
             return
         self.nodes_to_add.append(attention_node)
         self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
-        self.nodes_to_remove.extend([*qkv_nodes, *q_nodes[:-2], *k_nodes[:-2], *v_nodes])
+        self.nodes_to_remove.extend(
+            [*qkv_nodes, *q_nodes[:-2], *k_nodes[:-2], *v_nodes]
+        )
         self.prune_graph = True
 
     def fuse_pattern1(self, normalize_node, input_name_to_nodes, output_name_to_node):
-        """ match Swin-L pattern and fuse them to CustomFC --> Attention --> CustomFC
-        """
+        """match Swin-L pattern and fuse them to CustomFC --> Attention --> CustomFC"""
         logger.debug("fuse swin-L attention pass")
         # 1. CustomFCPluginDynamic_IxRT node as start, go up to find a pattern for swin-L pattern
         start_node = normalize_node
@@ -221,45 +277,74 @@ class FusionSwinLAttention(Fusion):
         if qkv_nodes is None:
             logger.debug("fuse_attention: failed to match qkv path")
             return
-        assert qkv_path == 'path1', 'abnormal qkv path'
+        assert qkv_path == "path1", "abnormal qkv path"
         reshape_qkv, transpose_qkv, matmul_qkv = qkv_nodes
 
         # 2. MatMul as start, go up to find v path
         v_paths = {
-            "path1": (["Transpose", "Reshape", "Add", "Split", "MatMul"], [None, 0, 0, None, 0])
+            "path1": (
+                ["Transpose", "Reshape", "Add", "Split", "MatMul"],
+                [None, 0, 0, None, 0],
+            )
         }
         v_nodes, v_path = self.match_parent_path_from_dict(matmul_qkv, v_paths)
         if not v_nodes:
             logger.debug("fuse_attention: failed to match v path")
             return
-        assert v_path == 'path1', 'abnormal v path'
+        assert v_path == "path1", "abnormal v path"
 
         # 3. MatMul as start, go up to find q,k paths
         # q path
         q_paths = {
-            "path1": (["Softmax", "Add", "Div", "MatMul", "Transpose", "Reshape", "Add", "Split", "MatMul"],
-                      [None, 0, 0, 0, 0, 0, 0, None, 0]),
+            "path1": (
+                [
+                    "Softmax",
+                    "Add",
+                    "Div",
+                    "MatMul",
+                    "Transpose",
+                    "Reshape",
+                    "Add",
+                    "Split",
+                    "MatMul",
+                ],
+                [None, 0, 0, 0, 0, 0, 0, None, 0],
+            ),
         }
         q_nodes, q_path = self.match_parent_path_from_dict(matmul_qkv, q_paths)
         if not q_nodes:
             logger.debug("fuse_attention: failed to match q path")
             return
-        assert q_path == 'path1', 'abnormal q paths found'
+        assert q_path == "path1", "abnormal q paths found"
 
         # get Add(bias) input name as fused Attention inputs
         add_op, div_op = q_nodes[1], q_nodes[2]
-        relative_position_bias_name = add_op.input[1] if add_op.input[0] == div_op.output[0] else add_op.input[0]
+        relative_position_bias_name = (
+            add_op.input[1] if add_op.input[0] == div_op.output[0] else add_op.input[0]
+        )
 
         # k path
         k_paths = {
-            "path2": (["Softmax", "Add", "Div", "MatMul", "Transpose", "Reshape", "Add", "Split", "MatMul"],
-                      [None, 0, 0, 0, 1, 0, 0, None, 0])
+            "path2": (
+                [
+                    "Softmax",
+                    "Add",
+                    "Div",
+                    "MatMul",
+                    "Transpose",
+                    "Reshape",
+                    "Add",
+                    "Split",
+                    "MatMul",
+                ],
+                [None, 0, 0, 0, 1, 0, 0, None, 0],
+            )
         }
         k_nodes, k_path = self.match_parent_path_from_dict(matmul_qkv, k_paths)
         if not k_nodes:
             logger.debug("fuse_attention: failed to match k path")
             return
-        assert k_path == 'path2', 'abnormal k paths found'
+        assert k_path == "path2", "abnormal k paths found"
         # 4. Attention and CustomFC have been found, now transform the found nodes to two plugin nodes
         # Test 3 paths have the same origin
         is_same_origin = q_nodes[-1] is k_nodes[-1] is v_nodes[-1]
@@ -279,9 +364,11 @@ class FusionSwinLAttention(Fusion):
         if not weight or not all(biases):
             print("swin-L: couldn't find weights")
             return
-        weight_arr = onnx.numpy_helper.to_array(weight).transpose(1,0)
+        weight_arr = onnx.numpy_helper.to_array(weight).transpose(1, 0)
         weight.CopyFrom(numpy_helper.from_array(weight_arr))
-        bias_arr = np.concatenate([onnx.numpy_helper.to_array(i) for i in biases], axis=0)
+        bias_arr = np.concatenate(
+            [onnx.numpy_helper.to_array(i) for i in biases], axis=0
+        )
 
         fused_node = helper.make_node(
             "CustomFCPluginDynamic_IxRT",
@@ -290,10 +377,14 @@ class FusionSwinLAttention(Fusion):
             name=self.model.create_node_name("CustomFC", "MatMul_AddBias_"),
         )
         fused_node.domain = "com.iluvatar"
-        fused_node.attribute.extend([helper.make_attribute("out_dims", bias_arr.shape[0])])
+        fused_node.attribute.extend(
+            [helper.make_attribute("out_dims", bias_arr.shape[0])]
+        )
         fused_node.attribute.extend([helper.make_attribute("type_id", 2)])
         fused_node.attribute.extend([helper.make_attribute("W", weight)])
-        fused_node.attribute.extend([helper.make_attribute("B", numpy_helper.from_array(bias_arr))])
+        fused_node.attribute.extend(
+            [helper.make_attribute("B", numpy_helper.from_array(bias_arr))]
+        )
         fused_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
         fused_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
         fused_node.attribute.extend([helper.make_attribute("act_type", -1)])
@@ -305,13 +396,14 @@ class FusionSwinLAttention(Fusion):
             hidden_size,
             [fused_node.output[0], relative_position_bias_name],
             reshape_qkv.output[0],
-
         )
         if not attention_node:
             return
         self.nodes_to_add.append(attention_node)
         self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
-        self.nodes_to_remove.extend([*qkv_nodes, *q_nodes[:-2], *k_nodes[:-2], *v_nodes])
+        self.nodes_to_remove.extend(
+            [*qkv_nodes, *q_nodes[:-2], *k_nodes[:-2], *v_nodes]
+        )
         self.prune_graph = True
 
     def fuse_tensor_in_node_attrs(self, fc_nodes, attr_name, tensor_name):
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_t5_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_t5_attention.py
index 661e8375973d1dd6706ad95a112ddc177a178d53..bce0ab1713f20a19533e5793c4888607a7619c81 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_t5_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_t5_attention.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -22,7 +38,7 @@ from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_typ
 logger = getLogger(__name__)
 
 
-class FusionT5Attention(Fusion):
+class FusionT5EncoderAttention(Fusion):
     """
     Fuse T5Attention subgraph into one Attention node.
     """
@@ -310,3 +326,170 @@ class FusionT5Attention(Fusion):
             self.nodes_to_remove.extend(q_nodes)
             self.nodes_to_remove.extend(k_nodes)
             self.nodes_to_remove.extend(v_nodes[:-2])
+
+
+class FusionT5DecoderAttention(Fusion):
+    """
+    Fuse T5Attention subgraph into one Attention node.
+    """
+
+    def __init__(
+        self,
+        model: OnnxModel,
+    ):
+        super().__init__(
+            model,
+            "CustomQkvCrossToContext_IxRT",
+            ["Softmax"],
+        )
+
+        # Flags to show warning only once
+        self.num_heads_warning = True
+        self.hidden_size_warning = True
+
+    def get_num_heads_and_hidden_size(self, reshape_q: NodeProto) -> Tuple[int, int]:
+        """Detect num_heads and hidden_size from a reshape node.
+
+        Args:
+            reshape_q (NodeProto): reshape node for Q
+
+        Returns:
+            Tuple[int, int]: num_heads and hidden_size
+        """
+
+        # we assume that reshape fusion has done, so the shape is a tensor like [0, 0, num_heads, head_size]
+        q_shape = self.model.get_initializer(reshape_q.input[1])
+        if q_shape is None:
+            logger.debug(f"{reshape_q.input[1]} is not initializer.")
+            return [0, 0]
+
+        q_shape_value = NumpyHelper.to_array(q_shape)
+        if len(q_shape_value) != 4 or (q_shape_value[2] <= 0 or q_shape_value[3] <= 0):
+            logger.debug(
+                f"q_shape_value={q_shape_value}. Expected value are like [0, 0, num_heads, head_size]."
+            )
+            return [0, 0]
+
+        num_heads = q_shape_value[2]
+        head_size = q_shape_value[3]
+        hidden_size = num_heads * head_size
+
+        return num_heads, hidden_size
+
+    def create_decoder_attention_node(
+        self, inputs: str, outputs: str, type_mask: int, has_mask: int
+    ) -> Union[NodeProto, None]:
+        """Create an Attention node.
+
+        Args:
+            input (str): input name
+            output (str): output name
+
+        Returns:
+            Union[NodeProto, None]: the node created or None if failed.
+        """
+
+        attention_node_name = self.model.create_node_name("decoder_Attention")
+        attention_node = helper.make_node(
+            "CustomQkvCrossToContext_IxRT",
+            inputs=inputs,
+            outputs=outputs,
+            name=attention_node_name,
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("scale", 1.0)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", has_mask)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("type_mask", type_mask)])
+
+        return attention_node
+
+    def fuse(self, node, input_name_to_nodes, output_name_to_node):
+
+        """
+         path1:
+
+         (query) ---------------->MatMul --> add -->softmax --->MatMul--->
+                                    /        /                    /
+         (key)   ---->Transpose -->         /                    /
+                                           /                    /
+         (mask)   ------------------------>                    /
+                                                              /
+         (value)--------------------------------------------->
+
+
+
+        path2:
+
+         (query) ---------------->MatMul ---------->softmax --->MatMul--->
+                                    /                             /
+         (key)   ---->Transpose -->                              /
+                                                                /
+                                                               /
+                                                              /
+         (value)--------------------------------------------->
+
+        """
+
+        start_node = node
+        qkv_paths = {
+            "path1": (
+                ["Add", "MatMul", "Transpose"],
+                [0, 0, 0],
+            ),  # float mask self attention,self attention key pass
+            "path2": (["MatMul", "Transpose"], [0, 0]),  # cross attention qery pass
+        }
+
+        qkv_nodes, qkv_path = self.match_parent_path_from_dict(start_node, qkv_paths)
+        if qkv_nodes is None:
+            logger.debug("fuse_attention: failed to match qkv path")
+            return
+        next_nodes = self.model.get_children(node)
+        if len(next_nodes) == 0:
+            return
+
+        if next_nodes[0].op_type != "MatMul":
+            return
+
+        second_matmul_node = next_nodes[0]
+        attention_inputs = None
+        attention_outputs = second_matmul_node.output
+        remove_nodes = [second_matmul_node, node]
+        if qkv_path == "path1":
+            (add_node, first_matmul_node, transpose_node) = qkv_nodes
+            transpose_nodes = self.model.get_parents(first_matmul_node)
+            q_input = transpose_nodes[0].output[0]
+            k_input = transpose_nodes[1].input[0]
+            v_input = second_matmul_node.input[1]
+            attention_inputs = [q_input, k_input, v_input]
+            remove_nodes.extend([add_node, first_matmul_node, transpose_nodes[1]])
+
+        if qkv_path == "path2":
+            (first_matmul_node, transpose_node) = qkv_nodes
+            transpose_nodes = self.model.get_parents(first_matmul_node)
+            q_input = transpose_nodes[0].output[0]
+            k_input = transpose_nodes[1].input[0]
+            v_input = second_matmul_node.input[1]
+            attention_inputs = [q_input, k_input, v_input]
+            remove_nodes.extend([first_matmul_node, transpose_nodes[1]])
+
+        has_mask = 0
+        type_mask = 4  # int32 mask
+
+        if qkv_path == "path1":
+            mask_input = add_node.input[0]
+            score_out = first_matmul_node.output[0]
+            if add_node.input[0] == score_out:
+                mask_input = add_node.input[1]
+            attention_inputs.append(mask_input)
+            has_mask = 1
+            type_mask = 3  # float mask
+
+        atten_node = self.create_decoder_attention_node(
+            attention_inputs, attention_outputs, type_mask, has_mask
+        )
+        self.nodes_to_add.append(atten_node)
+        self.node_name_to_graph_name[atten_node.name] = self.this_graph_name
+        self.nodes_to_remove.extend(remove_nodes)
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_utils.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_utils.py
index 5207f28f0a57f417b1cbd45fdeb88168e2baf50d..4765c8f51dbbf7b1f0da9e7821cc714665d1fbd8 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_utils.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_utils.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -9,6 +25,7 @@ import numpy
 from numpy import array_equal, ndarray
 from onnx import NodeProto, TensorProto, helper, numpy_helper
 from onnx import onnx_pb as onnx_proto
+
 from .onnx_model import OnnxModel
 
 logger = getLogger(__name__)
@@ -20,12 +37,17 @@ class FusionUtils:
 
     def cast_graph_input_to_int32(self, input_name: str) -> Tuple[bool, str]:
         graph_input = self.model.find_graph_input(input_name)
-        if graph_input is not None and graph_input.type.tensor_type.elem_type != TensorProto.INT32:
+        if (
+            graph_input is not None
+            and graph_input.type.tensor_type.elem_type != TensorProto.INT32
+        ):
             cast_output, cast_node = self.cast_input_to_int32(input_name)
             logger.debug(f"Casted graph input {input_name} to int32")
             return True, cast_output
 
-        logger.debug(f"Did not cast graph input {input_name} to int32: found {graph_input is not None}")
+        logger.debug(
+            f"Did not cast graph input {input_name} to int32: found {graph_input is not None}"
+        )
         return False, input_name
 
     def cast_input_to_int32(self, input_name: str):
@@ -40,7 +62,9 @@ class FusionUtils:
                 inputs = [parent_node.input[0]]
 
         cast_node = helper.make_node("Cast", inputs=inputs, outputs=[cast_output])
-        cast_node.attribute.extend([helper.make_attribute("to", int(TensorProto.INT32))])
+        cast_node.attribute.extend(
+            [helper.make_attribute("to", int(TensorProto.INT32))]
+        )
         self.model.add_node(cast_node)
 
         return cast_output, cast_node
@@ -61,7 +85,9 @@ class FusionUtils:
                     self.model.replace_input_of_all_nodes(output_name, input_name)
 
     @staticmethod
-    def check_node_attribute(node, attribute_name: str, expected_value, default_value=None):
+    def check_node_attribute(
+        node, attribute_name: str, expected_value, default_value=None
+    ):
         """Verify that a node has expected value for an attribute.
 
         Args:
@@ -79,9 +105,9 @@ class FusionUtils:
                 value = helper.get_attribute_value(attr)
 
         if isinstance(expected_value, list):
-            return (isinstance(value, ndarray) or isinstance(value, list)) and array_equal(
-                expected_value, value, equal_nan=False
-            )
+            return (
+                isinstance(value, ndarray) or isinstance(value, list)
+            ) and array_equal(expected_value, value, equal_nan=False)
         else:
             return value == expected_value
 
@@ -94,13 +120,17 @@ class FusionUtils:
             tensor (TensorProto): transposed tensor
         """
         if not isinstance(tensor, onnx_proto.TensorProto):
-            raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor))
+            raise ValueError(
+                "Expected input type is an ONNX TensorProto but got %s" % type(tensor)
+            )
 
         if len(tensor.dims) != 2 or tensor.data_type != onnx_proto.TensorProto.INT8:
             raise ValueError("Only INT8 2-D tensors can be transposed")
 
         if tensor.raw_data:
-            int32_data = numpy.reshape(numpy.frombuffer(tensor.raw_data, dtype="int8"), tensor.dims)
+            int32_data = numpy.reshape(
+                numpy.frombuffer(tensor.raw_data, dtype="int8"), tensor.dims
+            )
             int32_transposed_data = numpy.transpose(int32_data, [1, 0])
             tensor.raw_data = int32_transposed_data.tobytes()
 
@@ -110,7 +140,9 @@ class FusionUtils:
         return tensor
 
     @staticmethod
-    def check_qdq_node_for_fusion(node: NodeProto, model: OnnxModel, allow_per_tensor_quantization_only=True):
+    def check_qdq_node_for_fusion(
+        node: NodeProto, model: OnnxModel, allow_per_tensor_quantization_only=True
+    ):
         """Verify if a provided QuantizeLinear (Q) / DequantizeLinear (DQ) node is a good candidate for fusion.
            It is a good candidate for fusion if:
            (1) The Q/DQ node is for per-tensor quantization if allow_per_tensor_quantization_only is `True`
@@ -131,7 +163,9 @@ class FusionUtils:
             return False
 
         # Not per-tensor quantization
-        scale_has_single_element = scale.ndim == 0 or (scale.ndim == 1 and scale.shape[0] == 1)
+        scale_has_single_element = scale.ndim == 0 or (
+            scale.ndim == 1 and scale.shape[0] == 1
+        )
         if allow_per_tensor_quantization_only and not scale_has_single_element:
             return False
 
@@ -168,9 +202,9 @@ class FusionUtils:
         value = self.model.get_constant_value(node.input[input_index])
 
         if isinstance(expected_value, list):
-            return (isinstance(value, ndarray) or isinstance(value, list)) and array_equal(
-                expected_value, value, equal_nan=False
-            )
+            return (
+                isinstance(value, ndarray) or isinstance(value, list)
+            ) and array_equal(expected_value, value, equal_nan=False)
         else:
             return value == expected_value
 
@@ -216,7 +250,9 @@ class FusionUtils:
             for node in nodes_to_remove:
                 if bool(set(node.output) & graph_output_names):
                     if not bool(set(node.input) & graph_input_names):
-                        self.model.replace_output_of_all_nodes(node.input[0], node.output[0])
+                        self.model.replace_output_of_all_nodes(
+                            node.input[0], node.output[0]
+                        )
                     else:
                         continue
                 else:
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_videobert_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_videobert_attention.py
index 1133877bf6717dc7a2336db9e2c7976cf35c1405..d3244b7a609da3d8bfda6f91ed606259093e59c4 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_videobert_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_videobert_attention.py
@@ -1,7 +1,24 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
+import math
 from enum import Enum
 from logging import getLogger
 from os import name
@@ -9,17 +26,18 @@ from sys import path
 from typing import Tuple, Union
 
 import numpy as np
+import onnx
+from onnx import NodeProto, TensorProto, helper, numpy_helper
+
 from .fusion_base import Fusion
 from .fusion_options import AttentionMaskFormat
 from .fusion_utils import FusionUtils, NumpyHelper
-from onnx import NodeProto, TensorProto, helper, numpy_helper
 from .onnx_model import OnnxModel
 from .shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto
-import onnx
-import math
 
 logger = getLogger(__name__)
 
+
 class FusionVideoBertAttention(Fusion):
     """
     Fuse VideoBertAttention subgraph into one Attention node.
@@ -29,13 +47,19 @@ class FusionVideoBertAttention(Fusion):
         self,
         model: OnnxModel,
     ):
-        super().__init__(model, "CustomQKVToContextPluginDynamic_IxRT", ["CustomSkipLayerNormPluginDynamic_IxRT", "LayerNormalization"])
+        super().__init__(
+            model,
+            "CustomQKVToContextPluginDynamic_IxRT",
+            ["CustomSkipLayerNormPluginDynamic_IxRT", "LayerNormalization"],
+        )
 
         # Flags to show warning only once
         self.num_heads_warning = True
         self.hidden_size_warning = True
 
-    def get_num_heads_and_hidden_size(self, atten_matmul: NodeProto, div: NodeProto) -> Tuple[int, int]:
+    def get_num_heads_and_hidden_size(
+        self, atten_matmul: NodeProto, div: NodeProto
+    ) -> Tuple[int, int]:
         """Detect num_heads and hidden_size from a reshape node.
 
         Args:
@@ -48,7 +72,7 @@ class FusionVideoBertAttention(Fusion):
         # we assume that reshape fusion has done, so the shape is a tensor like [0, 0, num_heads, head_size]
         atten_matul_initializer = self.model.get_initializer(atten_matmul.input[1])
         div_initializer = self.model.get_initializer(div.input[1])
-        
+
         # 检查float_data是否为空
         if len(div_initializer.float_data) > 0:
             div_value = div_initializer.float_data[0]
@@ -60,13 +84,13 @@ class FusionVideoBertAttention(Fusion):
                 div_value = np.frombuffer(div_initializer.raw_data, dtype=dtype)[0]
             else:
                 raise ValueError("Data not found in the div_initializer")
-            
+
         atten_matul_shape_value = NumpyHelper.to_array(atten_matul_initializer).shape
-        head_dim = math.ceil(div_value*div_value)
+        head_dim = math.ceil(div_value * div_value)
         hidden_size = atten_matul_shape_value[0]
         num_heads = hidden_size // head_dim
 
-        return num_heads, hidden_size 
+        return num_heads, hidden_size
 
     def create_attention_node(
         self,
@@ -74,7 +98,7 @@ class FusionVideoBertAttention(Fusion):
         hidden_size: int,
         input: str,
         output: str,
-        matmul_qk_add: NodeProto
+        matmul_qk_add: NodeProto,
     ) -> Union[NodeProto, None]:
         """Create an Attention node.
 
@@ -90,11 +114,13 @@ class FusionVideoBertAttention(Fusion):
         assert num_heads > 0
 
         if hidden_size > 0 and (hidden_size % num_heads) != 0:
-            logger.debug(f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}")
+            logger.debug(
+                f"input hidden size {hidden_size} is not a multiple of num of heads {num_heads}"
+            )
             return None
 
         attention_node_name = self.model.create_node_name("Attention")
-        
+
         qk_bias = None
         has_mask = 0
         has_qk_bias = 0
@@ -106,13 +132,13 @@ class FusionVideoBertAttention(Fusion):
                 qk_bias_arr = qk_bias_arr.squeeze(0)
             has_neg_inf = np.isinf(qk_bias_arr) & (qk_bias_arr < 0)
             if np.any(has_neg_inf):
-                qk_bias_arr = np.where(qk_bias_arr == -np.inf, -100, 0.0).astype(np.float32)
+                qk_bias_arr = np.where(qk_bias_arr == -np.inf, -100, 0.0).astype(
+                    np.float32
+                )
             qk_bias.CopyFrom(numpy_helper.from_array(qk_bias_arr, qk_bias.name))
-        
-        attention_inputs = [
-            input
-        ]
-        
+
+        attention_inputs = [input]
+
         if qk_bias is not None:
             has_mask = 1
             attention_inputs.append(qk_bias.name)
@@ -126,12 +152,16 @@ class FusionVideoBertAttention(Fusion):
         attention_node.domain = "com.iluvatar"
         attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
         attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
-        attention_node.attribute.extend([helper.make_attribute("hidden_size", hidden_size)])
+        attention_node.attribute.extend(
+            [helper.make_attribute("hidden_size", hidden_size)]
+        )
         attention_node.attribute.extend([helper.make_attribute("has_mask", has_mask)])
         attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
         attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
-        attention_node.attribute.extend([helper.make_attribute("has_qk_bias", has_qk_bias)])
-        
+        attention_node.attribute.extend(
+            [helper.make_attribute("has_qk_bias", has_qk_bias)]
+        )
+
         return attention_node
 
     def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node):
@@ -145,8 +175,14 @@ class FusionVideoBertAttention(Fusion):
 
         # SkipLayerNormalization has two inputs, and one of them is the root input for attention.
         qkv_paths = {
-            "path1" : (["Add", "MatMul", "Reshape", "Transpose", "MatMul"], [0, None, 0, 0, 0]),
-            "path2" : (["Add", "MatMul", "Reshape", "Transpose", "MatMul"], [1, None, 0, 0, 0]),
+            "path1": (
+                ["Add", "MatMul", "Reshape", "Transpose", "MatMul"],
+                [0, None, 0, 0, 0],
+            ),
+            "path2": (
+                ["Add", "MatMul", "Reshape", "Transpose", "MatMul"],
+                [1, None, 0, 0, 0],
+            ),
         }
 
         qkv_nodes, qkv_path = self.match_parent_path_from_dict(start_node, qkv_paths)
@@ -155,7 +191,7 @@ class FusionVideoBertAttention(Fusion):
             logger.debug("fuse_attention: failed to match qkv path")
             return
 
-        if qkv_path in ['path1', 'path2']:
+        if qkv_path in ["path1", "path2"]:
             (_, atten_matmul, reshape_qkv, transpose_qkv, matmul_qkv) = qkv_nodes
 
         other_inputs = []
@@ -171,7 +207,7 @@ class FusionVideoBertAttention(Fusion):
 
         root_input = other_inputs[0]
         """
-        Match videobert              
+        Match videobert
         transpose/Add --> LayerNormalization -->  Attention -->     Add --> LayerNormalization
          |                                                        |
          |                                                        |
@@ -181,39 +217,42 @@ class FusionVideoBertAttention(Fusion):
         if transpose_before_layernorm is not None:
             node_children = input_name_to_nodes[transpose_before_layernorm.output[0]]
             for child in node_children:
-                if child is not None and child.op_type == 'LayerNormalization':
+                if child is not None and child.op_type == "LayerNormalization":
                     root_input = child.output[0]
 
         add_before_layernorm = self.model.match_parent(start_node, "Add", None)
         if add_before_layernorm is not None:
             node_children = input_name_to_nodes[add_before_layernorm.output[0]]
             for child in node_children:
-                if child is not None and child.op_type == 'LayerNormalization':
+                if child is not None and child.op_type == "LayerNormalization":
                     root_input = child.output[0]
 
         v_paths = {
-            "path1" : (["Transpose", "Reshape", "Slice", "Add", "MatMul"], [1, 0, 0, 0, None]) # videobert
+            "path1": (
+                ["Transpose", "Reshape", "Slice", "Add", "MatMul"],
+                [1, 0, 0, 0, None],
+            )  # videobert
         }
 
         v_nodes, v_path = self.match_parent_path_from_dict(matmul_qkv, v_paths)
-        if v_path == 'path1':
+        if v_path == "path1":
             (_, _, _, add_in_qkv, matmul_in_qkv) = v_nodes
 
         if v_nodes is None:
             logger.debug("fuse_attention: failed to match v path")
             return
-        
+
         qk_paths = {
             "path1": (["Softmax", "MatMul"], [0, 0]),
-            "path2": (["Softmax", "Add", "MatMul"], [0, 0, None])
+            "path2": (["Softmax", "Add", "MatMul"], [0, 0, None]),
         }
 
         qk_nodes, qk_path = self.match_parent_path_from_dict(matmul_qkv, qk_paths)
-        
+
         if qk_nodes is None:
             logger.debug("fuse_attention: failed to match qk path")
             return
-        
+
         matmul_qk_add = None
         if qk_path == "path1":
             (_, matmul_qk) = qk_nodes
@@ -221,45 +260,51 @@ class FusionVideoBertAttention(Fusion):
             (_, matmul_qk_add, matmul_qk) = qk_nodes
 
         q_paths = {
-            "path1" : (["Transpose", "Reshape", "Slice"], [0, 0, 0]),
-            "path2" : (["Div", "Transpose", "Reshape", "Slice"], [0, 0, 0, 0])
+            "path1": (["Transpose", "Reshape", "Slice"], [0, 0, 0]),
+            "path2": (["Div", "Transpose", "Reshape", "Slice"], [0, 0, 0, 0]),
         }
         q_nodes, q_path = self.match_parent_path_from_dict(matmul_qk, q_paths)
         if q_nodes is None:
             logger.debug("fuse_attention: failed to match q path")
             return
-        
-        if q_path == 'path1':
+
+        if q_path == "path1":
             (_, _, slice_q) = q_nodes
         else:
             (div, _, _, slice_q) = q_nodes
 
         k_paths = {
-            "path1" : (["Transpose", "Reshape", "Slice"], [1, 0, 0]),
-            "path2" : (["Div", "Transpose", "Reshape", "Slice"], [1, 0, 0, 0])
+            "path1": (["Transpose", "Reshape", "Slice"], [1, 0, 0]),
+            "path2": (["Div", "Transpose", "Reshape", "Slice"], [1, 0, 0, 0]),
         }
         k_nodes, k_path = self.match_parent_path_from_dict(matmul_qk, k_paths)
 
         if k_nodes is None:
             logger.debug("fuse_attention: failed to match k path")
             return
-        
-        if k_path == 'path1':
+
+        if k_path == "path1":
             (_, _, slice_k) = k_nodes
         else:
             (div, _, _, slice_k) = k_nodes
-        
-        if matmul_in_qkv.input[0] == root_input and slice_q.input[0] == add_in_qkv.output[0] and slice_k.input[0] == add_in_qkv.output[0]:
+
+        if (
+            matmul_in_qkv.input[0] == root_input
+            and slice_q.input[0] == add_in_qkv.output[0]
+            and slice_k.input[0] == add_in_qkv.output[0]
+        ):
             attention_last_node = reshape_qkv
 
-            num_heads, hidden_size = self.get_num_heads_and_hidden_size(atten_matmul, div)
-            
+            num_heads, hidden_size = self.get_num_heads_and_hidden_size(
+                atten_matmul, div
+            )
+
             new_node = self.create_attention_node(
                 num_heads,
                 hidden_size,
                 add_in_qkv.output[0],
                 attention_last_node.output[0],
-                matmul_qk_add
+                matmul_qk_add,
             )
             if new_node is None:
                 return
@@ -267,34 +312,41 @@ class FusionVideoBertAttention(Fusion):
             self.nodes_to_add.append(new_node)
             self.node_name_to_graph_name[new_node.name] = self.this_graph_name
 
-            self.nodes_to_remove.extend([attention_last_node, transpose_qkv, matmul_qkv])
+            self.nodes_to_remove.extend(
+                [attention_last_node, transpose_qkv, matmul_qkv]
+            )
             self.nodes_to_remove.extend(qk_nodes)
             self.nodes_to_remove.extend(q_nodes)
             self.nodes_to_remove.extend(k_nodes)
             self.nodes_to_remove.extend(v_nodes[:-2])
-            
+
             # fuse head and tail transpose
             if transpose_before_layernorm is not None:
-                node_children = input_name_to_nodes[transpose_before_layernorm.output[0]]
+                node_children = input_name_to_nodes[
+                    transpose_before_layernorm.output[0]
+                ]
                 for child in node_children:
                     for i, input in enumerate(child.input):
                         if child.input[i] == transpose_before_layernorm.output[0]:
                             child.input[i] = transpose_before_layernorm.input[0]
                 self.nodes_to_remove.extend([transpose_before_layernorm])
-                
+
                 node = transpose_before_layernorm
                 while True:
                     found = False
                     node_children = input_name_to_nodes[node.output[0]]
                     for child in node_children:
-                        if child is not None and child.op_type in ['SkipLayerNorm', "Add"]:
+                        if child is not None and child.op_type in [
+                            "SkipLayerNorm",
+                            "Add",
+                        ]:
                             node = child
                             found = True
                             break
                     if not found:
                         break
                 node_children = input_name_to_nodes[node.output[0]]
-                if len(node_children) == 1 and node_children[0].op_type == 'Transpose':
+                if len(node_children) == 1 and node_children[0].op_type == "Transpose":
                     transpose_node = node_children[0]
                     transpose_children = input_name_to_nodes[transpose_node.output[0]]
                     for i, input in enumerate(transpose_children[0].input):
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_vit_attention.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_vit_attention.py
index e6e16f17a8a7a9679f9dc52d2902297ee3d0e33a..f1a5410b62283e45f4f0a8957eaf7e83be6a6124 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_vit_attention.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_vit_attention.py
@@ -1,8 +1,25 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 import math
+from typing import Dict
 from enum import Enum
 from logging import getLogger
 from os import name
@@ -352,3 +369,101 @@ class FusionVITAttention(Fusion):
             # Use prune graph to remove mask nodes since they are shared by all attention nodes.
             # self.nodes_to_remove.extend(mask_nodes)
             # self.prune_graph = True
+
+
+class FusionTorchvisionVITAttention(Fusion):
+    """
+    Fuse VITAttention subgraph into one Attention node.
+    """
+
+    def __init__(self, model: OnnxModel):
+        super().__init__(
+            model, "CustomQKVToContextPluginDynamic_IxRT", "CustomFCPluginDynamic_IxRT"
+        )
+
+    def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict):
+        """
+        [Root] -->  CustomFCPluginDynamic_IxRT-->  CustomQKVToContextPluginDynamic_IxRT  --> CustomFCPluginDynamic_IxRT
+        """
+        children = self.model.get_children(node, input_name_to_nodes)
+        parent = self.model.get_parents(node, output_name_to_node)
+        
+        if len(children) != 1:
+            return
+        if len(parent) != 1:
+            return
+
+        fc_first_node = None
+        for par in parent:
+            fc_first_node = self.model.find_first_parent_by_type(
+                par, "CustomFCPluginDynamic_IxRT", output_name_to_node, recursive=True
+            )
+            if fc_first_node is not None:
+                break
+        if fc_first_node is None:
+            return
+        
+        start_node = node
+        
+        # v path
+        v_nodes = self.model.match_parent_path(
+            start_node,
+            ["Transpose", "MatMul", "Reshape", "Transpose", "Reshape", "Gather", "Squeeze", "Transpose", "Unsqueeze", "Reshape"],
+            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        # path1, q and k path
+        q_nodes = self.model.match_parent_path(
+            start_node,
+            ["Transpose", "MatMul", "Softmax", "MatMul", "Mul", "Transpose", "Reshape", "Transpose", "Reshape", "Gather", "Squeeze", "Transpose", "Unsqueeze", "Reshape"],
+            [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        k_nodes = self.model.match_parent_path(
+            start_node,
+            ["Transpose", "MatMul", "Softmax", "MatMul", "Mul", "Reshape", "Transpose", "Reshape", "Gather", "Squeeze", "Transpose", "Unsqueeze", "Reshape"],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            output_name_to_node,
+        )
+        
+        if v_nodes is None:
+            return
+        
+        if v_nodes and q_nodes and k_nodes:
+            subgraph_nodes = []
+            subgraph_nodes.extend(q_nodes)
+            subgraph_nodes.extend(k_nodes)
+            subgraph_nodes.extend(v_nodes)
+            
+            subgraph_nodes_unique = []
+            for item in subgraph_nodes:
+                if item not in subgraph_nodes_unique:
+                    subgraph_nodes_unique.append(item)
+            
+            hidden_size = start_node.attribute[0].i
+            _, mul_val = self.model.get_constant_input(k_nodes[4])
+            num_heads = hidden_size // (math.floor(1.0 / (mul_val * mul_val)) * math.floor(1.0 / (mul_val * mul_val)))
+            
+        attention_node = helper.make_node(
+            "CustomQKVToContextPluginDynamic_IxRT",
+            inputs=[fc_first_node.output[0]],
+            outputs=[start_node.input[0]],
+            name=self.model.create_node_name(
+                "TorchvisionVitAttention", name_prefix="TorchvisionVitAttention"
+            ),
+        )
+        attention_node.domain = "com.iluvatar"
+        attention_node.attribute.extend([helper.make_attribute("type_id", 2)])
+        attention_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
+        attention_node.attribute.extend([helper.make_attribute("hidden_size", hidden_size)])
+        attention_node.attribute.extend([helper.make_attribute("has_mask", 0)])
+        attention_node.attribute.extend([helper.make_attribute("plugin_namespace", "")])
+        attention_node.attribute.extend([helper.make_attribute("plugin_version", "1")])
+        attention_node.attribute.extend([helper.make_attribute("has_qk_bias", 0)])
+        
+        self.nodes_to_remove.extend(subgraph_nodes_unique)
+        
+        self.nodes_to_add.append(attention_node)
+        self.node_name_to_graph_name[attention_node.name] = self.this_graph_name
\ No newline at end of file
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_xsoftmax.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_xsoftmax.py
index 85d9cb2d8de05e0e59cb369c1d336649e4f8b429..df55ba645988ddbffcd157e38db2c73ff34789a2 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_xsoftmax.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_xsoftmax.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_yolov5_decoder.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_yolov5_decoder.py
index ba66693c965db49dc4287911fc00e2373a20efbc..f2d07ce96d60c5e8fbfc749d1049bad471525239 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_yolov5_decoder.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/fusion_yolov5_decoder.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/onnx_model.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/onnx_model.py
index b176058c9fdc7a5b3dbbc9ef8294d910f689cc31..0b76f660fce62ec0aa19b8c132a6ba51cf6fe319 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/onnx_model.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/onnx_model.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/shape_infer_helper.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/shape_infer_helper.py
index 111444028e4ed9aa1d068f93167f8fabaca71b92..a48b53db83fa675713cd9e4ac3b38d2ed554a73b 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/shape_infer_helper.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/shape_infer_helper.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation.  All rights reserved.
 # Licensed under the MIT License.
@@ -15,13 +31,24 @@ if os.path.exists(os.path.join(file_path, "../tools/symbolic_shape_infer.py")):
 else:
     sys.path.append(os.path.join(file_path, ".."))
 
-from .symbolic_shape_infer import SymbolicShapeInference, get_shape_from_type_proto, sympy
+from .symbolic_shape_infer import (
+    SymbolicShapeInference,
+    get_shape_from_type_proto,
+    sympy,
+)
 
 logger = logging.getLogger(__name__)
 
 
 class SymbolicShapeInferenceHelper(SymbolicShapeInference):
-    def __init__(self, model, verbose=0, int_max=2**31 - 1, auto_merge=True, guess_output_rank=False):
+    def __init__(
+        self,
+        model,
+        verbose=0,
+        int_max=2**31 - 1,
+        auto_merge=True,
+        guess_output_rank=False,
+    ):
         super().__init__(int_max, auto_merge, guess_output_rank, verbose)
         self.model_ = model
         self.all_shapes_inferred_: bool = False
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/symbolic_shape_infer.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/symbolic_shape_infer.py
index e5157f90eedf906e3e6f24dddf03219d3ca570f7..2311ad57fdefa502a9e6d7edf44dc884c843ee51 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/symbolic_shape_infer.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/passes/symbolic_shape_infer.py
@@ -1,3 +1,19 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
@@ -24,7 +40,11 @@ def get_attribute(node, attr_name, default_value=None):
 
 
 def get_dim_from_proto(dim):
-    return getattr(dim, dim.WhichOneof("value")) if type(dim.WhichOneof("value")) == str else None
+    return (
+        getattr(dim, dim.WhichOneof("value"))
+        if type(dim.WhichOneof("value")) == str
+        else None
+    )
 
 
 def is_sequence(type_proto):
@@ -61,11 +81,16 @@ def make_named_value_info(name):
 
 
 def get_shape_from_sympy_shape(sympy_shape):
-    return [None if i is None else (int(i) if is_literal(i) else str(i)) for i in sympy_shape]
+    return [
+        None if i is None else (int(i) if is_literal(i) else str(i))
+        for i in sympy_shape
+    ]
 
 
 def is_literal(dim):
-    return type(dim) in [int, np.int64, np.int32, sympy.Integer] or (hasattr(dim, "is_number") and dim.is_number)
+    return type(dim) in [int, np.int64, np.int32, sympy.Integer] or (
+        hasattr(dim, "is_number") and dim.is_number
+    )
 
 
 def handle_negative_axis(axis, rank):
@@ -217,7 +242,12 @@ class SymbolicShapeInference:
         self.prefix_ = prefix
 
     def _add_suggested_merge(self, symbols, apply=False):
-        assert all([(type(s) == str and s in self.symbolic_dims_) or is_literal(s) for s in symbols])
+        assert all(
+            [
+                (type(s) == str and s in self.symbolic_dims_) or is_literal(s)
+                for s in symbols
+            ]
+        )
         symbols = set(symbols)
         for k, v in self.suggested_merge_.items():
             if k in symbols:
@@ -243,7 +273,11 @@ class SymbolicShapeInference:
         # when nothing to map to, use the shorter one
         if map_to is None:
             if self.verbose_ > 0:
-                logger.warning("Potential unsafe merge between symbolic expressions: ({})".format(",".join(symbols)))
+                logger.warning(
+                    "Potential unsafe merge between symbolic expressions: ({})".format(
+                        ",".join(symbols)
+                    )
+                )
             symbols_list = list(symbols)
             lens = [len(s) for s in symbols_list]
             map_to = symbols_list[lens.index(min(lens))]
@@ -264,7 +298,9 @@ class SymbolicShapeInference:
     def _apply_suggested_merge(self, graph_input_only=False):
         if not self.suggested_merge_:
             return
-        for i in list(self.out_mp_.graph.input) + ([] if graph_input_only else list(self.out_mp_.graph.value_info)):
+        for i in list(self.out_mp_.graph.input) + (
+            [] if graph_input_only else list(self.out_mp_.graph.value_info)
+        ):
             for d in i.type.tensor_type.shape.dim:
                 if d.dim_param in self.suggested_merge_:
                     v = self.suggested_merge_[d.dim_param]
@@ -284,7 +320,9 @@ class SymbolicShapeInference:
                 [
                     (
                         i.name,
-                        helper.make_tensor_value_info(i.name, i.data_type, list(i.dims)),
+                        helper.make_tensor_value_info(
+                            i.name, i.data_type, list(i.dims)
+                        ),
                     )
                     for i in self.out_mp_.graph.initializer
                 ]
@@ -296,7 +334,9 @@ class SymbolicShapeInference:
             if self.auto_merge_:
                 unique_dims = list(set(dims))
                 is_int = [is_literal(d) for d in unique_dims]
-                assert sum(is_int) <= 1  # if there are more than 1 unique ints, something is wrong
+                assert (
+                    sum(is_int) <= 1
+                )  # if there are more than 1 unique ints, something is wrong
                 if sum(is_int) == 1:
                     int_dim = is_int.index(1)
                     if self.verbose_ > 0:
@@ -310,13 +350,19 @@ class SymbolicShapeInference:
                     return unique_dims[int_dim]
                 else:
                     if self.verbose_ > 0:
-                        logger.debug("dim {} has been mergd with dim {}".format(unique_dims[1:], unique_dims[0]))
+                        logger.debug(
+                            "dim {} has been mergd with dim {}".format(
+                                unique_dims[1:], unique_dims[0]
+                            )
+                        )
                     return dims[0]
             else:
                 return None
         if all([d == dims[0] for d in dims]):
             return dims[0]
-        merged = [self.suggested_merge_[d] if d in self.suggested_merge_ else d for d in dims]
+        merged = [
+            self.suggested_merge_[d] if d in self.suggested_merge_ else d for d in dims
+        ]
         if all([d == merged[0] for d in merged]):
             assert merged[0] in self.symbolic_dims_
             return merged[0]
@@ -345,7 +391,12 @@ class SymbolicShapeInference:
                     if self.auto_merge_:
                         self._add_suggested_merge([dim1, dim2], apply=True)
                     else:
-                        logger.warning("unsupported broadcast between " + str(dim1) + " " + str(dim2))
+                        logger.warning(
+                            "unsupported broadcast between "
+                            + str(dim1)
+                            + " "
+                            + str(dim2)
+                        )
             new_shape = [new_dim] + new_shape
         return new_shape
 
@@ -378,7 +429,11 @@ class SymbolicShapeInference:
     def _get_value(self, node, idx):
         name = node.input[idx]
         assert name in self.sympy_data_ or name in self.initializers_
-        return self.sympy_data_[name] if name in self.sympy_data_ else numpy_helper.to_array(self.initializers_[name])
+        return (
+            self.sympy_data_[name]
+            if name in self.sympy_data_
+            else numpy_helper.to_array(self.initializers_[name])
+        )
 
     def _try_get_value(self, node, idx):
         if idx >= len(node.input):
@@ -395,7 +450,9 @@ class SymbolicShapeInference:
                 if str_dim in self.suggested_merge_:
                     if is_literal(self.suggested_merge_[str_dim]):
                         continue  # no need to create dim for literals
-                    new_sympy_shape[i] = self.symbolic_dims_[self.suggested_merge_[str_dim]]
+                    new_sympy_shape[i] = self.symbolic_dims_[
+                        self.suggested_merge_[str_dim]
+                    ]
                 else:
                     # add new_dim if it's a computational expression
                     if not str(new_dim) in self.symbolic_dims_:
@@ -456,24 +513,38 @@ class SymbolicShapeInference:
                 vi.name = o
             self.known_vi_[o] = vi
 
-    def _onnx_infer_subgraph(self, node, subgraph, use_node_input=True, inc_subgraph_id=True):
+    def _onnx_infer_subgraph(
+        self, node, subgraph, use_node_input=True, inc_subgraph_id=True
+    ):
         if self.verbose_ > 2:
             logger.debug(
-                "Inferencing subgraph of node {} with output({}...): {}".format(node.name, node.output[0], node.op_type)
+                "Inferencing subgraph of node {} with output({}...): {}".format(
+                    node.name, node.output[0], node.op_type
+                )
             )
         # node inputs are not passed directly to the subgraph
         # it's up to the node dispatcher to prepare subgraph input
         # for example, with Scan/Loop, subgraph input shape would be trimmed from node input shape
         # besides, inputs in subgraph could shadow implicit inputs
-        subgraph_inputs = set([i.name for i in list(subgraph.initializer) + list(subgraph.input)])
-        subgraph_implicit_input = set([name for name in self.known_vi_.keys() if not name in subgraph_inputs])
+        subgraph_inputs = set(
+            [i.name for i in list(subgraph.initializer) + list(subgraph.input)]
+        )
+        subgraph_implicit_input = set(
+            [name for name in self.known_vi_.keys() if not name in subgraph_inputs]
+        )
         tmp_graph = helper.make_graph(
             list(subgraph.node),
             "tmp",
             list(subgraph.input) + [self.known_vi_[i] for i in subgraph_implicit_input],
             [make_named_value_info(i.name) for i in subgraph.output],
         )
-        tmp_graph.initializer.extend([i for i in self.out_mp_.graph.initializer if i.name in subgraph_implicit_input])
+        tmp_graph.initializer.extend(
+            [
+                i
+                for i in self.out_mp_.graph.initializer
+                if i.name in subgraph_implicit_input
+            ]
+        )
         tmp_graph.initializer.extend(subgraph.initializer)
         self.tmp_mp_.graph.CopyFrom(tmp_graph)
 
@@ -491,12 +562,16 @@ class SymbolicShapeInference:
         symbolic_shape_inference._preprocess(self.tmp_mp_)
         symbolic_shape_inference.suggested_merge_ = self.suggested_merge_.copy()
         while symbolic_shape_inference.run_:
-            all_shapes_inferred = symbolic_shape_inference._infer_impl(self.sympy_data_.copy())
+            all_shapes_inferred = symbolic_shape_inference._infer_impl(
+                self.sympy_data_.copy()
+            )
         symbolic_shape_inference._update_output_from_vi()
         if use_node_input:
             # if subgraph uses node input, it needs to update to merged dims
             subgraph.ClearField("input")
-            subgraph.input.extend(symbolic_shape_inference.out_mp_.graph.input[: len(node.input)])
+            subgraph.input.extend(
+                symbolic_shape_inference.out_mp_.graph.input[: len(node.input)]
+            )
         subgraph.ClearField("output")
         subgraph.output.extend(symbolic_shape_inference.out_mp_.graph.output)
         subgraph.ClearField("value_info")
@@ -504,9 +579,18 @@ class SymbolicShapeInference:
         subgraph.ClearField("node")
         subgraph.node.extend(symbolic_shape_inference.out_mp_.graph.node)
         # for new symbolic dims from subgraph output, add to main graph symbolic dims
-        subgraph_shapes = [get_shape_from_value_info(o) for o in symbolic_shape_inference.out_mp_.graph.output]
+        subgraph_shapes = [
+            get_shape_from_value_info(o)
+            for o in symbolic_shape_inference.out_mp_.graph.output
+        ]
         subgraph_new_symbolic_dims = set(
-            [d for s in subgraph_shapes if s for d in s if type(d) == str and not d in self.symbolic_dims_]
+            [
+                d
+                for s in subgraph_shapes
+                if s
+                for d in s
+                if type(d) == str and not d in self.symbolic_dims_
+            ]
         )
         new_dims = {}
         for d in subgraph_new_symbolic_dims:
@@ -597,7 +681,9 @@ class SymbolicShapeInference:
         )
 
     def _new_symbolic_shape(self, rank, node, out_idx=0):
-        return [self._new_symbolic_dim_from_output(node, out_idx, i) for i in range(rank)]
+        return [
+            self._new_symbolic_dim_from_output(node, out_idx, i) for i in range(rank)
+        ]
 
     def _compute_conv_pool_shape(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
@@ -625,14 +711,18 @@ class SymbolicShapeInference:
 
         dilations = get_attribute(node, "dilations", [1] * rank)
         strides = get_attribute(node, "strides", [1] * rank)
-        effective_kernel_shape = [(k - 1) * d + 1 for k, d in zip(kernel_shape, dilations)]
+        effective_kernel_shape = [
+            (k - 1) * d + 1 for k, d in zip(kernel_shape, dilations)
+        ]
         pads = get_attribute(node, "pads")
         if pads is None:
             pads = [0] * (2 * rank)
             auto_pad = get_attribute(node, "auto_pad", b"NOTSET").decode("utf-8")
             if auto_pad != "VALID" and auto_pad != "NOTSET":
                 try:
-                    residual = [sympy.Mod(d, s) for d, s in zip(sympy_shape[-rank:], strides)]
+                    residual = [
+                        sympy.Mod(d, s) for d, s in zip(sympy_shape[-rank:], strides)
+                    ]
                     total_pads = [
                         max(0, (k - s) if r == 0 else (k - r))
                         for k, s, r in zip(effective_kernel_shape, strides, residual)
@@ -659,7 +749,9 @@ class SymbolicShapeInference:
                     (effective_input_size - effective_kernel_shape[i]) / strides[i]
                 )
             else:
-                strided_kernel_positions = (effective_input_size - effective_kernel_shape[i]) // strides[i]
+                strided_kernel_positions = (
+                    effective_input_size - effective_kernel_shape[i]
+                ) // strides[i]
             sympy_shape[-rank + i] = strided_kernel_positions + 1
         return sympy_shape
 
@@ -688,7 +780,11 @@ class SymbolicShapeInference:
         else:
             lhs_reduce_dim = -1
             rhs_reduce_dim = -2
-            new_shape = self._broadcast_shapes(lhs_shape[:-2], rhs_shape[:-2]) + [lhs_shape[-2]] + [rhs_shape[-1]]
+            new_shape = (
+                self._broadcast_shapes(lhs_shape[:-2], rhs_shape[:-2])
+                + [lhs_shape[-2]]
+                + [rhs_shape[-1]]
+            )
         # merge reduce dim
         self._check_merged_dims(
             [lhs_shape[lhs_reduce_dim], rhs_shape[rhs_reduce_dim]],
@@ -698,17 +794,23 @@ class SymbolicShapeInference:
             # infer output_dtype from input type when not specified
             output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], output_dtype, new_shape)
+        )
 
     def _fuse_tensor_type(self, node, out_idx, dst_type, src_type):
         """
         update dst_tensor_type to be compatible with src_tensor_type when dimension mismatches
         """
         dst_tensor_type = (
-            dst_type.sequence_type.elem_type.tensor_type if is_sequence(dst_type) else dst_type.tensor_type
+            dst_type.sequence_type.elem_type.tensor_type
+            if is_sequence(dst_type)
+            else dst_type.tensor_type
         )
         src_tensor_type = (
-            src_type.sequence_type.elem_type.tensor_type if is_sequence(src_type) else src_type.tensor_type
+            src_type.sequence_type.elem_type.tensor_type
+            if is_sequence(src_type)
+            else src_type.tensor_type
         )
         if dst_tensor_type.elem_type != src_tensor_type.elem_type:
             node_id = node.name if node.name else node.op_type
@@ -718,13 +820,17 @@ class SymbolicShapeInference:
                 f"{onnx.onnx_pb.TensorProto.DataType.Name(src_tensor_type.elem_type)}"
             )
         if dst_tensor_type.HasField("shape"):
-            for di, ds in enumerate(zip(dst_tensor_type.shape.dim, src_tensor_type.shape.dim)):
+            for di, ds in enumerate(
+                zip(dst_tensor_type.shape.dim, src_tensor_type.shape.dim)
+            ):
                 if ds[0] != ds[1]:
                     # create a new symbolic dimension for node/out_idx/mismatch dim id in dst_tensor_type for tensor_type
                     # for sequence_type, clear the dimension
                     new_dim = onnx.TensorShapeProto.Dimension()
                     if not is_sequence(dst_type):
-                        new_dim.dim_param = str(self._new_symbolic_dim_from_output(node, out_idx, di))
+                        new_dim.dim_param = str(
+                            self._new_symbolic_dim_from_output(node, out_idx, di)
+                        )
                     dst_tensor_type.shape.dim[di].CopyFrom(new_dim)
         else:
             dst_tensor_type.CopyFrom(src_tensor_type)
@@ -749,10 +855,18 @@ class SymbolicShapeInference:
             "Floor": lambda l: sympy.floor(l[0]),
             "Max": lambda l: l[1]
             if is_literal(l[0]) and int(l[0]) < -self.int_max_
-            else (l[0] if is_literal(l[1]) and int(l[1]) < -self.int_max_ else sympy.Max(l[0], l[1])),
+            else (
+                l[0]
+                if is_literal(l[1]) and int(l[1]) < -self.int_max_
+                else sympy.Max(l[0], l[1])
+            ),
             "Min": lambda l: l[1]
             if is_literal(l[0]) and int(l[0]) > self.int_max_
-            else (l[0] if is_literal(l[1]) and int(l[1]) > self.int_max_ else sympy.Min(l[0], l[1])),
+            else (
+                l[0]
+                if is_literal(l[1]) and int(l[1]) > self.int_max_
+                else sympy.Min(l[0], l[1])
+            ),
             "Mul": lambda l: l[0] * l[1],
             "Sub": lambda l: l[0] - l[1],
             "Where": lambda l: l[1] if l[0] else l[2],
@@ -771,7 +885,11 @@ class SymbolicShapeInference:
         else:
             output_type = onnx.TensorProto.STRING
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_type, self._get_shape(node, 0)))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], output_type, self._get_shape(node, 0)
+            )
+        )
 
     def _infer_Compress(self, node):
         input_shape = self._get_shape(node, 0)
@@ -817,7 +935,11 @@ class SymbolicShapeInference:
         for d in range(len(sympy_shape)):
             if d == axis:
                 continue
-            dims = [self._get_shape(node, i_idx)[d] for i_idx in range(len(node.input)) if self._get_shape(node, i_idx)]
+            dims = [
+                self._get_shape(node, i_idx)[d]
+                for i_idx in range(len(node.input))
+                if self._get_shape(node, i_idx)
+            ]
             if all([d == dims[0] for d in dims]):
                 continue
             merged = self._merge_symbols(dims)
@@ -837,7 +959,9 @@ class SymbolicShapeInference:
     def _infer_ConcatFromSequence(self, node):
         seq_shape = self._get_shape(node, 0)
         new_axis = 1 if get_attribute(node, "new_axis") else 0
-        axis = handle_negative_axis(get_attribute(node, "axis"), len(seq_shape) + new_axis)
+        axis = handle_negative_axis(
+            get_attribute(node, "axis"), len(seq_shape) + new_axis
+        )
         concat_dim = str(self._new_symbolic_dim_from_output(node, 0, axis))
         new_shape = seq_shape
         if new_axis:
@@ -848,7 +972,9 @@ class SymbolicShapeInference:
         vi.CopyFrom(
             helper.make_tensor_value_info(
                 node.output[0],
-                self.known_vi_[node.input[0]].type.sequence_type.elem_type.tensor_type.elem_type,
+                self.known_vi_[
+                    node.input[0]
+                ].type.sequence_type.elem_type.tensor_type.elem_type,
                 new_shape,
             )
         )
@@ -865,7 +991,9 @@ class SymbolicShapeInference:
                 sympy_shape = [sympy_shape]
             self._update_computed_dims(sympy_shape)
             # update sympy data if output type is int, and shape is known
-            if vi.type.tensor_type.elem_type == onnx.TensorProto.INT64 and all([is_literal(x) for x in sympy_shape]):
+            if vi.type.tensor_type.elem_type == onnx.TensorProto.INT64 and all(
+                [is_literal(x) for x in sympy_shape]
+            ):
                 self.sympy_data_[node.output[0]] = np.ones(
                     [int(x) for x in sympy_shape], dtype=np.int64
                 ) * numpy_helper.to_array(get_attribute(node, "value", 0))
@@ -954,7 +1082,9 @@ class SymbolicShapeInference:
 
         output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_sympy_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], output_dtype, new_sympy_shape)
+        )
 
     def _infer_Expand(self, node):
         expand_to_shape = as_list(self._try_get_value(node, 1), keep_none=True)
@@ -962,7 +1092,9 @@ class SymbolicShapeInference:
             # new_shape's dim can come from shape value
             self._update_computed_dims(expand_to_shape)
             shape = self._get_shape(node, 0)
-            new_shape = self._broadcast_shapes(shape, get_shape_from_sympy_shape(expand_to_shape))
+            new_shape = self._broadcast_shapes(
+                shape, get_shape_from_sympy_shape(expand_to_shape)
+            )
             vi = self.known_vi_[node.output[0]]
             vi.CopyFrom(
                 helper.make_tensor_value_info(
@@ -985,7 +1117,11 @@ class SymbolicShapeInference:
             )
         )
         # for 1D input, do some sympy compute
-        if node.input[0] in self.sympy_data_ and len(data_shape) == 1 and 0 == get_attribute(node, "axis", 0):
+        if (
+            node.input[0] in self.sympy_data_
+            and len(data_shape) == 1
+            and 0 == get_attribute(node, "axis", 0)
+        ):
             idx = self._try_get_value(node, 1)
             if idx is not None:
                 data = self.sympy_data_[node.input[0]]
@@ -1040,24 +1176,32 @@ class SymbolicShapeInference:
                 subgraphs[0].CopyFrom(subgraphs[1])
 
         for i_sub, subgraph in enumerate(subgraphs):
-            subgraph_infer = self._onnx_infer_subgraph(node, subgraph, use_node_input=False)
+            subgraph_infer = self._onnx_infer_subgraph(
+                node, subgraph, use_node_input=False
+            )
             for i_out in range(len(node.output)):
                 vi = self.known_vi_[node.output[i_out]]
                 if i_sub == 0:
                     vi.CopyFrom(subgraph.output[i_out])
                     vi.name = node.output[i_out]
                 else:
-                    self._fuse_tensor_type(node, i_out, vi.type, subgraph.output[i_out].type)
+                    self._fuse_tensor_type(
+                        node, i_out, vi.type, subgraph.output[i_out].type
+                    )
 
                 # pass on sympy data from subgraph, if cond is constant
                 if cond is not None and i_sub == (0 if as_scalar(cond) > 0 else 1):
                     if subgraph.output[i_out].name in subgraph_infer.sympy_data_:
-                        self.sympy_data_[vi.name] = subgraph_infer.sympy_data_[subgraph.output[i_out].name]
+                        self.sympy_data_[vi.name] = subgraph_infer.sympy_data_[
+                            subgraph.output[i_out].name
+                        ]
 
     def _infer_Loop(self, node):
         subgraph = get_attribute(node, "body")
         assert len(subgraph.input) == len(node.input)
-        num_loop_carried = len(node.input) - 2  # minus the length and initial loop condition
+        num_loop_carried = (
+            len(node.input) - 2
+        )  # minus the length and initial loop condition
         # when sequence_type is used as loop carried input
         # needs to run subgraph infer twice if the tensor shape in sequence contains None
         for i, si in enumerate(subgraph.input):
@@ -1079,7 +1223,9 @@ class SymbolicShapeInference:
                     # copy shape from output to input
                     # note that loop input is [loop_len, cond, input_0, input_1, ...]
                     # while loop output is [cond, output_0, output_1, ...]
-                    subgraph.input[i_out + 1].type.sequence_type.elem_type.CopyFrom(so.type.sequence_type.elem_type)
+                    subgraph.input[i_out + 1].type.sequence_type.elem_type.CopyFrom(
+                        so.type.sequence_type.elem_type
+                    )
                     need_second_infer = True
             else:
                 si = subgraph.input[i_out + 1]
@@ -1087,7 +1233,9 @@ class SymbolicShapeInference:
                 for di, dims in enumerate(zip(si_shape, so_shape)):
                     if dims[0] != dims[1]:
                         new_dim = onnx.TensorShapeProto.Dimension()
-                        new_dim.dim_param = str(self._new_symbolic_dim_from_output(node, i_out, di))
+                        new_dim.dim_param = str(
+                            self._new_symbolic_dim_from_output(node, i_out, di)
+                        )
                         si.type.tensor_type.shape.dim[di].CopyFrom(new_dim)
                         so.type.tensor_type.shape.dim[di].CopyFrom(new_dim)
                         need_second_infer = True
@@ -1105,9 +1253,13 @@ class SymbolicShapeInference:
         loop_iter_dim = str(self._new_symbolic_dim_from_output(node))
         for i in range(len(node.output)):
             vi = self.known_vi_[node.output[i]]
-            vi.CopyFrom(subgraph.output[i + 1])  # first subgraph output is condition, not in node output
+            vi.CopyFrom(
+                subgraph.output[i + 1]
+            )  # first subgraph output is condition, not in node output
             if i >= num_loop_carried:
-                assert not is_sequence(vi.type)  # TODO: handle loop accumulation in sequence_type
+                assert not is_sequence(
+                    vi.type
+                )  # TODO: handle loop accumulation in sequence_type
                 subgraph_vi_dim = subgraph.output[i + 1].type.tensor_type.shape.dim
                 vi.type.tensor_type.shape.ClearField("dim")
                 vi_dim = vi.type.tensor_type.shape.dim
@@ -1124,14 +1276,22 @@ class SymbolicShapeInference:
     def _infer_NonMaxSuppression(self, node):
         selected = str(self._new_symbolic_dim_from_output(node))
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, [selected, 3]))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], onnx.TensorProto.INT64, [selected, 3]
+            )
+        )
 
     def _infer_NonZero(self, node):
         input_rank = self._get_shape_rank(node, 0)
         # create a new symbolic dimension for NonZero output
         nz_len = str(self._new_symbolic_dim_from_output(node, 0, 1))
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], vi.type.tensor_type.elem_type, [input_rank, nz_len]))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], vi.type.tensor_type.elem_type, [input_rank, nz_len]
+            )
+        )
 
     def _infer_OneHot(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
@@ -1140,7 +1300,11 @@ class SymbolicShapeInference:
         axis = handle_negative_axis(axis, len(sympy_shape) + 1)
         new_shape = get_shape_from_sympy_shape(
             sympy_shape[:axis]
-            + [self._new_symbolic_dim_from_output(node) if not is_literal(depth) else depth]
+            + [
+                self._new_symbolic_dim_from_output(node)
+                if not is_literal(depth)
+                else depth
+            ]
             + sympy_shape[axis:]
         )
         vi = self.known_vi_[node.output[0]]
@@ -1164,7 +1328,8 @@ class SymbolicShapeInference:
         if pads is not None:
             assert len(pads) == 2 * rank
             new_sympy_shape = [
-                d + pad_up + pad_down for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:])
+                d + pad_up + pad_down
+                for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:])
             ]
             self._update_computed_dims(new_sympy_shape)
         else:
@@ -1174,7 +1339,9 @@ class SymbolicShapeInference:
 
         vi = self.known_vi_[node.output[0]]
         vi.CopyFrom(
-            helper.make_tensor_value_info(node.output[0], output_tp, get_shape_from_sympy_shape(new_sympy_shape))
+            helper.make_tensor_value_info(
+                node.output[0], output_tp, get_shape_from_sympy_shape(new_sympy_shape)
+            )
         )
 
     def _infer_Pool(self, node):
@@ -1198,7 +1365,11 @@ class SymbolicShapeInference:
         new_shape = self._broadcast_shapes(shape0, shape1)
         t0 = self.known_vi_[node.input[0]]
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], t0.type.tensor_type.elem_type, new_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], t0.type.tensor_type.elem_type, new_shape
+            )
+        )
 
     def _infer_aten_diagonal(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
@@ -1240,7 +1411,11 @@ class SymbolicShapeInference:
         assert rank in [1, 2]
         num_samples = self._try_get_value(node, 1)
         di = rank - 1
-        last_dim = num_samples if num_samples else str(self._new_symbolic_dim_from_output(node, 0, di))
+        last_dim = (
+            num_samples
+            if num_samples
+            else str(self._new_symbolic_dim_from_output(node, 0, di))
+        )
         output_shape = sympy_shape[:-1] + [last_dim]
         vi = self.known_vi_[node.output[0]]
         vi.CopyFrom(
@@ -1254,21 +1429,33 @@ class SymbolicShapeInference:
     def _infer_aten_pool2d(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
         assert len(sympy_shape) == 4
-        sympy_shape[-2:] = [self._new_symbolic_dim_from_output(node, 0, i) for i in [2, 3]]
+        sympy_shape[-2:] = [
+            self._new_symbolic_dim_from_output(node, 0, i) for i in [2, 3]
+        ]
         self._update_computed_dims(sympy_shape)
         for i, o in enumerate(node.output):
             if not o:
                 continue
             vi = self.known_vi_[o]
-            elem_type = onnx.TensorProto.INT64 if i == 1 else self.known_vi_[node.input[0]].type.tensor_type.elem_type
-            vi.CopyFrom(helper.make_tensor_value_info(o, elem_type, get_shape_from_sympy_shape(sympy_shape)))
+            elem_type = (
+                onnx.TensorProto.INT64
+                if i == 1
+                else self.known_vi_[node.input[0]].type.tensor_type.elem_type
+            )
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    o, elem_type, get_shape_from_sympy_shape(sympy_shape)
+                )
+            )
 
     def _infer_aten_minmax(self, node):
         vi = self.known_vi_[node.output[0]]
         if len(node.input) == 1:
             vi.CopyFrom(
                 helper.make_tensor_value_info(
-                    node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, []
+                    node.output[0],
+                    self.known_vi_[node.input[0]].type.tensor_type.elem_type,
+                    [],
                 )
             )
         else:
@@ -1278,7 +1465,9 @@ class SymbolicShapeInference:
             dim = self._try_get_value(node, 1)
             if dim is None:
                 rank = self._get_shape_rank(node, 0)
-                output_shape = self._new_symbolic_shape(rank if keepdim else rank - 1, node)
+                output_shape = self._new_symbolic_shape(
+                    rank if keepdim else rank - 1, node
+                )
             else:
                 shape = self._get_sympy_shape(node, 0)
                 dim = handle_negative_axis(dim, len(shape))
@@ -1290,11 +1479,17 @@ class SymbolicShapeInference:
             output_shape = get_shape_from_sympy_shape(output_shape)
             vi.CopyFrom(
                 helper.make_tensor_value_info(
-                    node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, output_shape
+                    node.output[0],
+                    self.known_vi_[node.input[0]].type.tensor_type.elem_type,
+                    output_shape,
                 )
             )
             vi1 = self.known_vi_[node.output[1]]
-            vi1.CopyFrom(helper.make_tensor_value_info(node.output[1], onnx.TensorProto.INT64, output_shape))
+            vi1.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[1], onnx.TensorProto.INT64, output_shape
+                )
+            )
 
     def _infer_aten_unfold(self, node):
         sympy_shape = self._get_sympy_shape(node, 0)
@@ -1337,12 +1532,18 @@ class SymbolicShapeInference:
                         del sympy_shape[dim]
                 else:
                     rank = len(sympy_shape)
-                    sympy_shape = self._new_symbolic_shape(rank if keepdim else rank - 1, node)
+                    sympy_shape = self._new_symbolic_shape(
+                        rank if keepdim else rank - 1, node
+                    )
                 self._update_computed_dims(sympy_shape)
                 new_shape = get_shape_from_sympy_shape(sympy_shape)
         if node.output[0] and new_shape is not None:
             vi = self.known_vi_[node.output[0]]
-            vi.CopyFrom(helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, new_shape))
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[0], onnx.TensorProto.INT64, new_shape
+                )
+            )
 
     def _infer_BatchNormalization(self, node):
         self._propagate_shape_and_type(node)
@@ -1384,7 +1585,11 @@ class SymbolicShapeInference:
                     helper.make_tensor_value_info(
                         node.output[0],
                         self.known_vi_[node.input[0]].type.tensor_type.elem_type,
-                        get_shape_from_sympy_shape(self._new_symbolic_shape(self._get_shape_rank(node, 0), node)),
+                        get_shape_from_sympy_shape(
+                            self._new_symbolic_shape(
+                                self._get_shape_rank(node, 0), node
+                            )
+                        ),
                     )
                 )
             else:
@@ -1425,7 +1630,9 @@ class SymbolicShapeInference:
                 helper.make_tensor_value_info(
                     node.output[0],
                     vi.type.tensor_type.elem_type,
-                    get_shape_from_sympy_shape(self._new_symbolic_shape(shape_rank, node)),
+                    get_shape_from_sympy_shape(
+                        self._new_symbolic_shape(shape_rank, node)
+                    ),
                 )
             )
         else:
@@ -1471,7 +1678,10 @@ class SymbolicShapeInference:
         if get_opset(self.out_mp_) <= 10:
             scales = self._try_get_value(node, 1)
             if scales is not None:
-                new_sympy_shape = [sympy.simplify(sympy.floor(d * s)) for d, s in zip(input_sympy_shape, scales)]
+                new_sympy_shape = [
+                    sympy.simplify(sympy.floor(d * s))
+                    for d, s in zip(input_sympy_shape, scales)
+                ]
                 self._update_computed_dims(new_sympy_shape)
                 vi.CopyFrom(
                     helper.make_tensor_value_info(
@@ -1489,7 +1699,10 @@ class SymbolicShapeInference:
                 self._update_computed_dims(new_sympy_shape)
             elif scales is not None:
                 rank = len(scales)
-                if get_attribute(node, "coordinate_transformation_mode") == "tf_crop_and_resize":
+                if (
+                    get_attribute(node, "coordinate_transformation_mode")
+                    == "tf_crop_and_resize"
+                ):
                     assert len(roi) == 2 * rank
                     roi_start = list(roi)[:rank]
                     roi_end = list(roi)[rank:]
@@ -1499,11 +1712,15 @@ class SymbolicShapeInference:
                 scales = list(scales)
                 new_sympy_shape = [
                     sympy.simplify(sympy.floor(d * (end - start) * scale))
-                    for d, start, end, scale in zip(input_sympy_shape, roi_start, roi_end, scales)
+                    for d, start, end, scale in zip(
+                        input_sympy_shape, roi_start, roi_end, scales
+                    )
                 ]
                 self._update_computed_dims(new_sympy_shape)
             else:
-                new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node)
+                new_sympy_shape = self._new_symbolic_shape(
+                    self._get_shape_rank(node, 0), node
+                )
 
             vi.CopyFrom(
                 helper.make_tensor_value_info(
@@ -1531,19 +1748,31 @@ class SymbolicShapeInference:
             si.CopyFrom(self.known_vi_[node.input[i]])
             if i >= num_scan_states:
                 scan_input_dim = si.type.tensor_type.shape.dim
-                scan_input_dim.remove(scan_input_dim[scan_input_axes[i - num_scan_states]])
+                scan_input_dim.remove(
+                    scan_input_dim[scan_input_axes[i - num_scan_states]]
+                )
             si.name = subgraph_name
         self._onnx_infer_subgraph(node, subgraph)
         num_scan_outputs = len(node.output) - num_scan_states
-        scan_output_axes = get_attribute(node, "scan_output_axes", [0] * num_scan_outputs)
-        scan_input_dim = get_shape_from_type_proto(self.known_vi_[node.input[-1]].type)[scan_input_axes[-1]]
+        scan_output_axes = get_attribute(
+            node, "scan_output_axes", [0] * num_scan_outputs
+        )
+        scan_input_dim = get_shape_from_type_proto(self.known_vi_[node.input[-1]].type)[
+            scan_input_axes[-1]
+        ]
         for i, o in enumerate(node.output):
             vi = self.known_vi_[o]
             if i >= num_scan_states:
                 shape = get_shape_from_type_proto(subgraph.output[i].type)
-                new_dim = handle_negative_axis(scan_output_axes[i - num_scan_states], len(shape) + 1)
+                new_dim = handle_negative_axis(
+                    scan_output_axes[i - num_scan_states], len(shape) + 1
+                )
                 shape = shape[:new_dim] + [scan_input_dim] + shape[new_dim:]
-                vi.CopyFrom(helper.make_tensor_value_info(o, subgraph.output[i].type.tensor_type.elem_type, shape))
+                vi.CopyFrom(
+                    helper.make_tensor_value_info(
+                        o, subgraph.output[i].type.tensor_type.elem_type, shape
+                    )
+                )
             else:
                 vi.CopyFrom(subgraph.output[i])
             vi.name = o
@@ -1676,7 +1905,9 @@ class SymbolicShapeInference:
                                 e = new_sympy_shape[i]
                         except Exception:
                             logger.warning(
-                                "Unable to determine if {} <= {}, treat as equal".format(e, new_sympy_shape[i])
+                                "Unable to determine if {} <= {}, treat as equal".format(
+                                    e, new_sympy_shape[i]
+                                )
                             )
                             e = new_sympy_shape[i]
 
@@ -1684,7 +1915,9 @@ class SymbolicShapeInference:
                 if is_literal(new_sympy_shape[i]) and is_literal(s):
                     s = max(0, min(s, new_sympy_shape[i]))
 
-                new_sympy_shape[i] = sympy.simplify((e - s + t + (-1 if t > 0 else 1)) // t)
+                new_sympy_shape[i] = sympy.simplify(
+                    (e - s + t + (-1 if t > 0 else 1)) // t
+                )
 
             self._update_computed_dims(new_sympy_shape)
 
@@ -1709,7 +1942,9 @@ class SymbolicShapeInference:
             if type(input_sympy_data) == list or (
                 type(input_sympy_data) == np.array and len(input_sympy_data.shape) == 1
             ):
-                self.sympy_data_[node.output[0]] = input_sympy_data[starts[0] : ends[0] : steps[0]]
+                self.sympy_data_[node.output[0]] = input_sympy_data[
+                    starts[0] : ends[0] : steps[0]
+                ]
 
     def _infer_SoftmaxCrossEntropyLoss(self, node):
         vi = self.known_vi_[node.output[0]]
@@ -1724,7 +1959,9 @@ class SymbolicShapeInference:
 
     def _infer_Split_Common(self, node, make_value_info_func):
         input_sympy_shape = self._get_sympy_shape(node, 0)
-        axis = handle_negative_axis(get_attribute(node, "axis", 0), len(input_sympy_shape))
+        axis = handle_negative_axis(
+            get_attribute(node, "axis", 0), len(input_sympy_shape)
+        )
         split = get_attribute(node, "split")
         if not split:
             num_outputs = len(node.output)
@@ -1739,7 +1976,11 @@ class SymbolicShapeInference:
                 make_value_info_func(
                     node.output[i_o],
                     self.known_vi_[node.input[0]].type.tensor_type.elem_type,
-                    get_shape_from_sympy_shape(input_sympy_shape[:axis] + [split[i_o]] + input_sympy_shape[axis + 1 :]),
+                    get_shape_from_sympy_shape(
+                        input_sympy_shape[:axis]
+                        + [split[i_o]]
+                        + input_sympy_shape[axis + 1 :]
+                    ),
                 )
             )
             self.known_vi_[vi.name] = vi
@@ -1808,7 +2049,9 @@ class SymbolicShapeInference:
                 new_sympy_shape.append(new_dim)
             self._update_computed_dims(new_sympy_shape)
         else:
-            new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node)
+            new_sympy_shape = self._new_symbolic_shape(
+                self._get_shape_rank(node, 0), node
+            )
         vi = self.known_vi_[node.output[0]]
         vi.CopyFrom(
             helper.make_tensor_value_info(
@@ -1845,7 +2088,11 @@ class SymbolicShapeInference:
 
         for i_o in range(len(node.output)):
             vi = self.known_vi_[node.output[i_o]]
-            vi.CopyFrom(helper.make_tensor_value_info(node.output[i_o], vi.type.tensor_type.elem_type, new_shape))
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[i_o], vi.type.tensor_type.elem_type, new_shape
+                )
+            )
 
     def _infer_Transpose(self, node):
         if node.input[0] in self.sympy_data_:
@@ -1853,7 +2100,11 @@ class SymbolicShapeInference:
             perm = get_attribute(node, "perm", reversed(list(range(len(data_shape)))))
             input_data = self.sympy_data_[node.input[0]]
             self.sympy_data_[node.output[0]] = (
-                np.transpose(np.array(input_data).reshape(*data_shape), axes=tuple(perm)).flatten().tolist()
+                np.transpose(
+                    np.array(input_data).reshape(*data_shape), axes=tuple(perm)
+                )
+                .flatten()
+                .tolist()
             )
 
     def _infer_Unsqueeze(self, node):
@@ -1901,7 +2152,9 @@ class SymbolicShapeInference:
         assert map_key_type is not None
         new_vi = onnx.ValueInfoProto()
         new_vi.name = node.output[0]
-        new_vi.type.sequence_type.elem_type.map_type.value_type.tensor_type.elem_type = onnx.TensorProto.FLOAT
+        new_vi.type.sequence_type.elem_type.map_type.value_type.tensor_type.elem_type = (
+            onnx.TensorProto.FLOAT
+        )
         new_vi.type.sequence_type.elem_type.map_type.key_type = map_key_type
         vi = self.known_vi_[node.output[0]]
         vi.CopyFrom(new_vi)
@@ -1936,7 +2189,9 @@ class SymbolicShapeInference:
                 else:
                     past_shape[3] = f"{past_shape[3]}+{input_shape[1]}"
                 vi = self.known_vi_[node.output[1]]
-                vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, past_shape))
+                vi.CopyFrom(
+                    helper.make_tensor_value_info(vi.name, output_dtype, past_shape)
+                )
 
     def _infer_BiasGelu(self, node):
         self._propagate_shape_and_type(node)
@@ -1961,17 +2216,29 @@ class SymbolicShapeInference:
 
         word_embedding_dtype = self.known_vi_[node.input[2]].type.tensor_type.elem_type
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], word_embedding_dtype, output_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], word_embedding_dtype, output_shape
+            )
+        )
 
         mask_index_shape = [input_ids_shape[0]]
         vi = self.known_vi_[node.output[1]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[1], onnx.TensorProto.INT32, mask_index_shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[1], onnx.TensorProto.INT32, mask_index_shape
+            )
+        )
 
         if len(node.output) > 2:
             # Optional output of add before layer nomalization is done
             # shape is same as the output
             vi = self.known_vi_[node.output[2]]
-            vi.CopyFrom(helper.make_tensor_value_info(node.output[2], word_embedding_dtype, output_shape))
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[2], word_embedding_dtype, output_shape
+                )
+            )
 
     def _infer_SkipLayerNormalization(self, node):
         self._propagate_shape_and_type(node)
@@ -1985,7 +2252,9 @@ class SymbolicShapeInference:
         # set the context output seperately.
         # The first output is autograd's context.
         vi = self.known_vi_[node.output[0]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, []))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, [])
+        )
 
         # Outputs after autograd's context are tensors.
         # We assume their ranks are fixed for different model inputs.
@@ -1994,14 +2263,22 @@ class SymbolicShapeInference:
             vi = self.known_vi_[node.output[i + 1]]
             sympy_shape = self._new_symbolic_shape(output_tensor_ranks[i], node)
             shape = get_shape_from_sympy_shape(sympy_shape)
-            value_info = helper.make_tensor_value_info(node.output[i + 1], output_tensor_types[i], shape)
+            value_info = helper.make_tensor_value_info(
+                node.output[i + 1], output_tensor_types[i], shape
+            )
             vi.CopyFrom(value_info)
 
     def _propagate_shape_and_type(self, node, input_index=0, output_index=0):
         shape = self._get_shape(node, input_index)
-        output_dtype = self.known_vi_[node.input[input_index]].type.tensor_type.elem_type
+        output_dtype = self.known_vi_[
+            node.input[input_index]
+        ].type.tensor_type.elem_type
         vi = self.known_vi_[node.output[output_index]]
-        vi.CopyFrom(helper.make_tensor_value_info(node.output[output_index], output_dtype, shape))
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[output_index], output_dtype, shape
+            )
+        )
 
     def _is_none_dim(self, dim_value):
         if type(dim_value) != str:
@@ -2036,7 +2313,9 @@ class SymbolicShapeInference:
             for i_dim, dim in enumerate(input_shape):
                 if dim is None:
                     # some models use None for symbolic dim in input, replace it with a string
-                    input_dims[i_dim].dim_param = str(self._new_symbolic_dim(i.name, i_dim))
+                    input_dims[i_dim].dim_param = str(
+                        self._new_symbolic_dim(i.name, i_dim)
+                    )
 
             self.input_symbols_.update([d for d in input_shape if type(d) == str])
 
@@ -2057,7 +2336,9 @@ class SymbolicShapeInference:
 
         # compute prerequesite for node for topological sort
         # node with subgraphs may have dependency on implicit inputs, which will affect topological sort
-        prereq_for_node = {}  # map from node to all its inputs, including implicit ones in subgraph
+        prereq_for_node = (
+            {}
+        )  # map from node to all its inputs, including implicit ones in subgraph
 
         def get_prereq(node):
             names = set(i for i in node.input if i)
@@ -2075,7 +2356,13 @@ class SymbolicShapeInference:
                 for n in g.node:
                     g_outputs_and_initializers.update(n.output)
                 for n in g.node:
-                    g_prereq.update([i for i in get_prereq(n) if i not in g_outputs_and_initializers])
+                    g_prereq.update(
+                        [
+                            i
+                            for i in get_prereq(n)
+                            if i not in g_outputs_and_initializers
+                        ]
+                    )
                 names.update(g_prereq)
                 # remove subgraph inputs from g_prereq since those are local-only
                 for i in g.input:
@@ -2088,16 +2375,28 @@ class SymbolicShapeInference:
 
         # topological sort nodes, note there might be dead nodes so we check if all graph outputs are reached to terminate
         sorted_nodes = []
-        sorted_known_vi = set([i.name for i in list(self.out_mp_.graph.input) + list(self.out_mp_.graph.initializer)])
+        sorted_known_vi = set(
+            [
+                i.name
+                for i in list(self.out_mp_.graph.input)
+                + list(self.out_mp_.graph.initializer)
+            ]
+        )
         if any([o.name in sorted_known_vi for o in self.out_mp_.graph.output]):
             # Loop/Scan will have some graph output in graph inputs, so don't do topological sort
             sorted_nodes = self.out_mp_.graph.node
         else:
-            while not all([o.name in sorted_known_vi for o in self.out_mp_.graph.output]):
+            while not all(
+                [o.name in sorted_known_vi for o in self.out_mp_.graph.output]
+            ):
                 old_sorted_nodes_len = len(sorted_nodes)
                 for node in self.out_mp_.graph.node:
                     if (node.output[0] not in sorted_known_vi) and all(
-                        [i in sorted_known_vi for i in prereq_for_node[node.output[0]] if i]
+                        [
+                            i in sorted_known_vi
+                            for i in prereq_for_node[node.output[0]]
+                            if i
+                        ]
                     ):
                         sorted_known_vi.update(node.output)
                         sorted_nodes.append(node)
@@ -2123,7 +2422,11 @@ class SymbolicShapeInference:
                 for attr in node.attribute:
                     # TODO: Is overload_name needed?
                     if attr.name == "operator":
-                        aten_op_name = attr.s.decode("utf-8") if isinstance(attr.s, bytes) else attr.s
+                        aten_op_name = (
+                            attr.s.decode("utf-8")
+                            if isinstance(attr.s, bytes)
+                            else attr.s
+                        )
                         if aten_op_name in self.aten_op_dispatcher_:
                             known_aten_op = True
                             self.aten_op_dispatcher_[aten_op_name](node)
@@ -2133,7 +2436,9 @@ class SymbolicShapeInference:
                 logger.debug(node.op_type + ": " + node.name)
                 for i, name in enumerate(node.input):
                     logger.debug(
-                        "  Input {}: {} {}".format(i, name, "initializer" if name in self.initializers_ else "")
+                        "  Input {}: {} {}".format(
+                            i, name, "initializer" if name in self.initializers_ else ""
+                        )
                     )
 
             # onnx automatically merge dims with value, i.e. Mul(['aaa', 'bbb'], [1000, 1]) -> [1000, 'bbb']
@@ -2152,8 +2457,20 @@ class SymbolicShapeInference:
                 vi = self.known_vi_[node.output[0]]
                 out_rank = len(get_shape_from_type_proto(vi.type))
                 in_shapes = [self._get_shape(node, i) for i in range(len(node.input))]
-                for d in range(out_rank - (2 if node.op_type in ["MatMul", "MatMulInteger", "MatMulInteger16"] else 0)):
-                    in_dims = [s[len(s) - out_rank + d] for s in in_shapes if len(s) + d >= out_rank]
+                for d in range(
+                    out_rank
+                    - (
+                        2
+                        if node.op_type
+                        in ["MatMul", "MatMulInteger", "MatMulInteger16"]
+                        else 0
+                    )
+                ):
+                    in_dims = [
+                        s[len(s) - out_rank + d]
+                        for s in in_shapes
+                        if len(s) + d >= out_rank
+                    ]
                     if len(in_dims) > 1:
                         self._check_merged_dims(in_dims, allow_broadcast=True)
 
@@ -2166,7 +2483,9 @@ class SymbolicShapeInference:
                 if out_type_kind not in ["tensor_type", "sparse_tensor_type", None]:
                     if self.verbose_ > 2:
                         if out_type_kind == "sequence_type":
-                            seq_cls_type = out_type.sequence_type.elem_type.WhichOneof("value")
+                            seq_cls_type = out_type.sequence_type.elem_type.WhichOneof(
+                                "value"
+                            )
                             if "tensor_type" == seq_cls_type:
                                 logger.debug(
                                     "  {}: sequence of {} {}".format(
@@ -2178,27 +2497,42 @@ class SymbolicShapeInference:
                                     )
                                 )
                             else:
-                                logger.debug("  {}: sequence of {}".format(node.output[i_o], seq_cls_type))
+                                logger.debug(
+                                    "  {}: sequence of {}".format(
+                                        node.output[i_o], seq_cls_type
+                                    )
+                                )
                         else:
-                            logger.debug("  {}: {}".format(node.output[i_o], out_type_kind))
+                            logger.debug(
+                                "  {}: {}".format(node.output[i_o], out_type_kind)
+                            )
                     continue
 
                 out_shape = get_shape_from_value_info(vi)
-                out_type_undefined = out_type.tensor_type.elem_type == onnx.TensorProto.UNDEFINED
+                out_type_undefined = (
+                    out_type.tensor_type.elem_type == onnx.TensorProto.UNDEFINED
+                )
                 if self.verbose_ > 2:
                     logger.debug(
                         "  {}: {} {}".format(
                             node.output[i_o],
                             str(out_shape),
-                            onnx.TensorProto.DataType.Name(vi.type.tensor_type.elem_type),
+                            onnx.TensorProto.DataType.Name(
+                                vi.type.tensor_type.elem_type
+                            ),
                         )
                     )
                     if node.output[i_o] in self.sympy_data_:
-                        logger.debug("  Sympy Data: " + str(self.sympy_data_[node.output[i_o]]))
+                        logger.debug(
+                            "  Sympy Data: " + str(self.sympy_data_[node.output[i_o]])
+                        )
 
                 # onnx >= 1.11.0, use unk__#index instead of None when the shape dim is uncertain
                 if (
-                    out_shape is not None and (None in out_shape or self._is_shape_contains_none_dim(out_shape))
+                    out_shape is not None
+                    and (
+                        None in out_shape or self._is_shape_contains_none_dim(out_shape)
+                    )
                 ) or out_type_undefined:
                     if self.auto_merge_:
                         if node.op_type in [
@@ -2220,21 +2554,36 @@ class SymbolicShapeInference:
                             "Min",
                             "Max",
                         ]:
-                            shapes = [self._get_shape(node, i) for i in range(len(node.input))]
+                            shapes = [
+                                self._get_shape(node, i) for i in range(len(node.input))
+                            ]
                             if node.op_type in [
                                 "MatMul",
                                 "MatMulInteger",
                                 "MatMulInteger16",
                             ]:
-                                if None in out_shape or self._is_shape_contains_none_dim(out_shape):
+                                if (
+                                    None in out_shape
+                                    or self._is_shape_contains_none_dim(out_shape)
+                                ):
                                     if None in out_shape:
                                         idx = out_shape.index(None)
                                     else:
-                                        idx = out_shape.index(self._is_shape_contains_none_dim(out_shape))
-                                    dim_idx = [len(s) - len(out_shape) + idx for s in shapes]
+                                        idx = out_shape.index(
+                                            self._is_shape_contains_none_dim(out_shape)
+                                        )
+                                    dim_idx = [
+                                        len(s) - len(out_shape) + idx for s in shapes
+                                    ]
                                     # only support auto merge for MatMul for dim < rank-2 when rank > 2
-                                    assert len(shapes[0]) > 2 and dim_idx[0] < len(shapes[0]) - 2
-                                    assert len(shapes[1]) > 2 and dim_idx[1] < len(shapes[1]) - 2
+                                    assert (
+                                        len(shapes[0]) > 2
+                                        and dim_idx[0] < len(shapes[0]) - 2
+                                    )
+                                    assert (
+                                        len(shapes[1]) > 2
+                                        and dim_idx[1] < len(shapes[1]) - 2
+                                    )
                         elif node.op_type == "Expand":
                             # auto merge for cases like Expand([min(batch, 1), min(seq, 512)], [batch, seq])
                             shapes = [
@@ -2246,11 +2595,15 @@ class SymbolicShapeInference:
 
                         if shapes:
                             for idx in range(len(out_shape)):
-                                if out_shape[idx] is not None and not self._is_none_dim(out_shape[idx]):
+                                if out_shape[idx] is not None and not self._is_none_dim(
+                                    out_shape[idx]
+                                ):
                                     continue
                                 # note that the broadcasting rule aligns from right to left
                                 # if a tensor has a lower rank (dim_idx[idx] < 0), it would automatically broadcast and need no merge
-                                dim_idx = [len(s) - len(out_shape) + idx for s in shapes]
+                                dim_idx = [
+                                    len(s) - len(out_shape) + idx for s in shapes
+                                ]
                                 if len(dim_idx) > 0:
                                     self._add_suggested_merge(
                                         [
@@ -2266,12 +2619,22 @@ class SymbolicShapeInference:
                         self.run_ = False
 
                     # create new dynamic dims for ops not handled by symbolic shape inference
-                    if self.run_ == False and not node.op_type in self.dispatcher_ and not known_aten_op:
-                        is_unknown_op = out_type_undefined and (out_shape is None or len(out_shape) == 0)
+                    if (
+                        self.run_ == False
+                        and not node.op_type in self.dispatcher_
+                        and not known_aten_op
+                    ):
+                        is_unknown_op = out_type_undefined and (
+                            out_shape is None or len(out_shape) == 0
+                        )
                         if is_unknown_op:
                             # unknown op to ONNX, maybe from higher opset or other domain
                             # only guess the output rank from input 0 when using guess_output_rank option
-                            out_rank = self._get_shape_rank(node, 0) if self.guess_output_rank_ else -1
+                            out_rank = (
+                                self._get_shape_rank(node, 0)
+                                if self.guess_output_rank_
+                                else -1
+                            )
                         else:
                             # valid ONNX op, but not handled by symbolic shape inference, just assign dynamic shape
                             out_rank = len(out_shape)
@@ -2280,7 +2643,9 @@ class SymbolicShapeInference:
                             new_shape = self._new_symbolic_shape(out_rank, node, i_o)
                             if out_type_undefined:
                                 # guess output data type from input vi if not defined
-                                out_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
+                                out_dtype = self.known_vi_[
+                                    node.input[0]
+                                ].type.tensor_type.elem_type
                             else:
                                 # otherwise, use original data type
                                 out_dtype = vi.type.tensor_type.elem_type
@@ -2312,7 +2677,12 @@ class SymbolicShapeInference:
                             continue  # continue the inference after guess, no need to stop as no merge is needed
 
                     if self.verbose_ > 0 or not self.auto_merge_ or out_type_undefined:
-                        logger.debug("Stopping at incomplete shape inference at " + node.op_type + ": " + node.name)
+                        logger.debug(
+                            "Stopping at incomplete shape inference at "
+                            + node.op_type
+                            + ": "
+                            + node.name
+                        )
                         logger.debug("node inputs:")
                         for i in node.input:
                             logger.debug(self.known_vi_[i])
@@ -2332,19 +2702,23 @@ class SymbolicShapeInference:
                 output.CopyFrom(self.known_vi_[output.name])
 
     @staticmethod
-    def infer_shapes(in_mp, int_max=2**31 - 1, auto_merge=False, guess_output_rank=False, verbose=0):
+    def infer_shapes(
+        in_mp, int_max=2**31 - 1, auto_merge=False, guess_output_rank=False, verbose=0
+    ):
         onnx_opset = get_opset(in_mp)
         if (not onnx_opset) or onnx_opset < 7:
             logger.warning("Only support models of onnx opset 7 and above.")
             return None
-        symbolic_shape_inference = SymbolicShapeInference(int_max, auto_merge, guess_output_rank, verbose)
+        symbolic_shape_inference = SymbolicShapeInference(
+            int_max, auto_merge, guess_output_rank, verbose
+        )
         all_shapes_inferred = False
         symbolic_shape_inference._preprocess(in_mp)
         while symbolic_shape_inference.run_:
             all_shapes_inferred = symbolic_shape_inference._infer_impl()
         symbolic_shape_inference._update_output_from_vi()
         if not all_shapes_inferred:
-            raise Exception("Incomplete symbolic shape inference")
+            logger.warning("Incomplete symbolic shape inference")
         return symbolic_shape_inference.out_mp_
 
 
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/requirements.txt b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/requirements.txt
index 4228878179aa0c3fa63fd9656087de5a90d5e31c..b80f9f4022328703df32af16182ea930645a6db6 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/requirements.txt
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/optimizer/requirements.txt
@@ -1,3 +1,19 @@
-sympy
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+#
+
+onnxsim
 packaging
-onnxsim
\ No newline at end of file
+sympy
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/runtime_backend_iluvatar.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/runtime_backend_iluvatar.py
index 0834ab6dd3247dbfad47996e8fba22dcd29c4cab..7781481dcaf68997e289f56d65a4f2222e948d7f 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/runtime_backend_iluvatar.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/runtime_backend_iluvatar.py
@@ -657,4 +657,4 @@ class RuntimeBackendILUVATAR(runtime_backend.RuntimeBackend):
                 i += 1
             return data
         else:
-            raise ValueError("Please provide input type")
\ No newline at end of file
+            raise ValueError("Please provide input type")
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
index 2b7a4df767042ab408b6cc1194ffdec1bdac976f..089d9860f573bba7e19f84aa20fb830a8fcc22d8 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
@@ -20,7 +20,6 @@ import subprocess
 import time
 
 from typing import Any, Dict, Tuple
-import virtualenv
 from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
 from prompt_toolkit.styles import Style
 
@@ -33,7 +32,6 @@ import argparse
 from general_perf.core.configs.workload_store import load_workload
 from general_perf.core.configs.dataset_store import load_dataset
 from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
-from general_perf.tools.build_pdf import build_pdf
 
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("PerfEngine")
@@ -208,7 +206,7 @@ class PerfEngine:
                     workload['data_percent'])
             diff_results = AccuracyChecker.calculate_diff()
             accuracy_report.update(diff_results)
-            accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
+            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
 
         if accuracy_report:
             base_report['Accuracy'] = accuracy_report
@@ -242,11 +240,6 @@ class PerfEngine:
         log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
                  format(output_dir[output_dir.rfind('general_perf'):],
                  os.path.basename(output_report_path)))
-        build_pdf(output_report_path)
-        log.info("PDF Version is saved in path: [ {}/{}-TO-{}.pdf ]".format(
-            output_dir[output_dir.rfind('general_perf'):],
-            base_report['Model'],
-            output_report_path.split('/')[-1].split('-')[1].upper()))
 
         return compile_info["compile_status"]
 
@@ -341,46 +334,7 @@ class PerfEngine:
         return answer
 
     def activate_venv(self, hardware_type: str) -> bool:
-        if os.path.exists('general_perf/backends/' + hardware_type +
-                          '/requirements.txt'):
-            log.info("Activating Virtual Env for " + hardware_type)
-
-            venv_dir = os.path.join("general_perf/backends",
-                                    hardware_type + "/venv")
-            activate_file = os.path.join(venv_dir, 'bin', 'activate_this.py')
-            if not os.path.exists(venv_dir):
-                log.info("venv not exist, Creating Virtual Env for " +
-                         hardware_type)
-                if (hardware_type == "HPU"):
-                    virtualenv.create_environment(venv_dir,True)
-                else:
-                    virtualenv.create_environment(venv_dir)
-                exec(open(activate_file).read(), {'__file__': activate_file})
-                python_path = os.path.join(venv_dir, 'bin', 'python3')
-                subprocess.call([
-                    python_path, '-m', 'pip', 'install', '--upgrade', 'pip', '--quiet'
-                ])
-                subprocess.call([
-                    python_path, '-m', 'pip', 'install', '-r', 'general_perf/backends/' +
-                    hardware_type + '/requirements.txt', '-q'
-                ])
-            else:
-                exec(open(activate_file).read(), {'__file__': activate_file})
-                '''
-                just in case install failed in pre-run.
-                '''
-                python_path = os.path.join(venv_dir, 'bin', 'python3')
-                subprocess.call([
-                    python_path, '-m', 'pip', 'install', '--upgrade', 'pip', '--quiet'
-                ])
-                subprocess.call([
-                    python_path, '-m', 'pip', 'install', '-r', 'general_perf/backends/' +
-                    hardware_type + '/requirements.txt', '-q'
-                ])
-
-                if not hasattr(sys, 'real_prefix'):
-                    return False
-                return True
+        
         return True
 
     def deactivate_venv(self):
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..709d6f7646ef73140c39ea3de9d4dd0b8aa66609
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/data_loader.py
@@ -0,0 +1,95 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import numpy as np
+from general_perf.datasets import data_loader
+from tqdm import tqdm
+import collections
+
+log = logging.getLogger("CAIL2019")
+
+maxlen = 1024
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+
+        log.info("Initial...")
+        self.config = config
+        self.cur_bs = 2
+
+        batch_token_ids = np.load(
+            "general_perf/datasets/{}/batch_token_ids.npy".format(
+                self.config['dataset_name']),
+            allow_pickle=True)
+        batch_segment_ids = np.load(
+            "general_perf/datasets/{}/batch_segment_ids.npy".format(
+                self.config['dataset_name']),
+            allow_pickle=True)
+        labels = np.load("general_perf/datasets/{}/label.npy".format(
+            self.config['dataset_name']),
+                         allow_pickle=True)
+        self.feed_dict = collections.defaultdict(list)
+        self.feed_dict['batch_token_ids'] = batch_token_ids.tolist()
+        self.feed_dict['batch_segment_ids'] = batch_segment_ids.tolist()
+        self.feed_dict['label'] = labels.tolist()
+
+        self.items = len(self.feed_dict['label'])
+        self.batch_num = int(self.items / self.cur_bs)
+
+        for i in range(self.items):
+            batch_token_id = np.pad(
+                self.feed_dict['batch_token_ids'][i],
+                (0, 1024 - len(self.feed_dict['batch_token_ids'][i])),
+                'constant').astype(np.float32)
+            batch_segment_id = np.pad(
+                self.feed_dict['batch_segment_ids'][i],
+                (0, 1024 - len(self.feed_dict['batch_segment_ids'][i])),
+                'constant').astype(np.float32)
+            self.feed_dict['batch_token_ids'][i] = batch_token_id.tolist()
+            self.feed_dict['batch_segment_ids'][i] = batch_segment_id.tolist()
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.cur_bs, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        self.labels = []
+        for i in tqdm(range(self.batch_num)):
+            split_data = {
+                'input_segment:0':
+                self.feed_dict["batch_segment_ids"][i * self.cur_bs:(i + 1) *
+                                                    self.cur_bs],
+                'input_token:0':
+                self.feed_dict["batch_token_ids"][i * self.cur_bs:(i + 1) *
+                                                  self.cur_bs],
+            }
+            self.labels.append(
+                self.feed_dict["label"][i * self.cur_bs:(i + 1) * self.cur_bs])
+            self.batched_data.append(split_data)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/pre_process_data.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/pre_process_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce353805a686df60343cc68602fd83959ac7a74c
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/pre_process_data.py
@@ -0,0 +1,56 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from tqdm import tqdm
+import json
+import collections
+import numpy as np
+from bert4keras.tokenizers import Tokenizer
+import jieba
+jieba.initialize()
+
+test_data = []
+with open("test.json", encoding='utf-8') as f:
+    for l in f:
+        l = json.loads(l)
+        assert l['label'] in 'BC'
+        if l['label'] == 'B':
+            test_data.append((l['A'], l['B'], l['C']))
+        else:
+            test_data.append((l['A'], l['C'], l['B']))
+
+tokenizer = Tokenizer("vocab.txt",
+                      do_lower_case=True,
+                      pre_tokenize=lambda s: jieba.cut(s, HMM=False))
+
+feed_dict = collections.defaultdict(list)
+maxlen = 1024
+for i in tqdm(range(len(test_data))):
+    (text1, text2, text3) = test_data[i]
+    token_ids, segment_ids = tokenizer.encode(text1, text2, maxlen=maxlen)
+    feed_dict["batch_token_ids"].append(token_ids)
+    feed_dict["batch_segment_ids"].append(segment_ids)
+    feed_dict["label"].append([1])
+    token_ids, segment_ids = tokenizer.encode(text1, text3, maxlen=maxlen)
+    feed_dict["batch_token_ids"].append(token_ids)
+    feed_dict["batch_segment_ids"].append(segment_ids)
+    feed_dict["label"].append([0])
+
+np.save("{}.npy".format('batch_token_ids'),
+        feed_dict["batch_token_ids"],
+        allow_pickle=True)
+np.save("{}.npy".format('batch_segment_ids'),
+        feed_dict["batch_segment_ids"],
+        allow_pickle=True)
+np.save("{}.npy".format('label'), feed_dict["label"], allow_pickle=True)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/test_accuracy.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fd917f67adb39b6a36d9a6b69ef8528d1cc3dc3
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019/test_accuracy.py
@@ -0,0 +1,45 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numpy as np
+from general_perf.datasets import test_accuracy
+from tqdm import tqdm
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+        good, total = 0, 0
+        diffs = []
+        for i in tqdm(range(num)):
+            test_data, labels = self.dataloader.get_samples(i)
+
+            results = self.runtime_backend.predict(test_data)
+            results = results[list(results)[0]]
+            diffs.append(results)
+
+            total += len(results) // 2
+            good += (results[::2] > results[1::2]).sum()
+
+        accuracy = round((good / total), 5)
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs)
+        log.info('Batch size is {}, Accuracy: {}'.format(
+            self.dataloader.cur_bs, accuracy))
+        return {"Top-1": accuracy}
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bef7f72acc9f75f1c29ee22ee712113a26165d6
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/data_loader.py
@@ -0,0 +1,155 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import logging
+
+import numpy as np
+import os
+import pickle
+from tqdm import tqdm
+from typing import Any
+from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
+from PIL import Image
+try:
+    from torchvision.transforms import InterpolationMode
+    BICUBIC = InterpolationMode.BICUBIC
+except ImportError:
+    BICUBIC = Image.BICUBIC
+
+from general_perf.datasets import data_loader
+
+log = logging.getLogger("CIFAR100")
+
+INPUT_TYPE = {
+    "UINT8": np.uint8,
+    "FLOAT32": np.float32,
+    "LONG": np.long,
+    "INT32": np.int32,
+    "INT64": np.int64
+}
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+        log.info("Initial...")
+
+        base_folder = "general_perf/datasets/{}/cifar-100-python".format(
+            self.config['dataset_name'])
+        test_list = [
+            ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],
+        ]
+        meta = {
+            'filename': 'meta',
+            'key': 'fine_label_names',
+            'md5': '7973b15100ade9c7d40fb424638fde48',
+        }
+
+        self.data: Any = []
+        self.targets = []
+
+        # now load the picked numpy arrays
+        for file_name, checksum in test_list:
+            file_path = os.path.join(base_folder, file_name)
+            with open(file_path, 'rb') as f:
+                entry = pickle.load(f, encoding='latin1')
+                self.data.append(entry['data'])
+                if 'labels' in entry:
+                    self.targets.extend(entry['labels'])
+                else:
+                    self.targets.extend(entry['fine_labels'])
+
+        self.data = np.vstack(self.data).reshape(-1, 3, 32, 32)
+        self.data = self.data.transpose((0, 2, 3, 1))  # convert to HWC
+
+        transformer = _transform()
+        path = os.path.join(base_folder, meta['filename'])
+        with open(path, 'rb') as infile:
+            data = pickle.load(infile, encoding='latin1')
+            self.classes = data[meta['key']]
+        self.class_to_idx = {
+            _class: i
+            for i, _class in enumerate(self.classes)
+        }
+        self.test_data = []
+        for i in tqdm(range(len(self.data))):
+            img = self.data[i]
+            img = Image.fromarray(img)
+            img = transformer(img).detach().numpy()
+            self.test_data.append(img)
+        self.text_input = np.load(os.path.join(base_folder, 'text.npy'))
+        self.config = config
+        self.cur_bs = 1
+        self.items = len(self.data)
+        self.batch_num = int(self.items / self.cur_bs)
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.cur_bs, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        self.labels = []
+        for i in tqdm(range(self.batch_num)):
+            split_data = {
+                'image': self.test_data[i * self.cur_bs:(i + 1) * self.cur_bs],
+                'text': self.text_input,
+            }
+            self.labels.append(self.targets[i * self.cur_bs:(i + 1) *
+                                            self.cur_bs])
+            self.batched_data.append(split_data)
+
+    def get_fake_samples(self, batch_size, shape, input_type):
+        data = {}
+        if input_type:
+            i = 0
+            for key, val in shape.items():
+                if key == "image":
+                    val = [val[0] * batch_size] + val[1:]
+                    data[key] = np.random.random(size=val).astype(
+                        INPUT_TYPE[input_type[i]])
+                else:
+                    data[key] = np.random.random(size=val).astype(
+                        INPUT_TYPE[input_type[i]])
+                i += 1
+            return data
+        else:
+            raise ValueError("Please provide input type")
+
+
+def _convert_image_to_rgb(image):
+    return image.convert("RGB")
+
+
+def _transform():
+    return Compose([
+        Resize(224, interpolation=BICUBIC),
+        CenterCrop(224),
+        _convert_image_to_rgb,
+        ToTensor(),
+        Normalize((0.48145466, 0.4578275, 0.40821073),
+                  (0.26862954, 0.26130258, 0.27577711)),
+    ])
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/test_accuracy.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbe6a86087452ff8d65b1f21aa0a6901409fe3f2
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cifar/test_accuracy.py
@@ -0,0 +1,49 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numpy as np
+from general_perf.datasets import test_accuracy
+from tqdm import tqdm
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+        good, total = 0, 0
+        diffs = []
+        for i in tqdm(range(num)):
+            test_data, labels = self.dataloader.get_samples(i)
+            logits_per_image, logits_per_text = self.runtime_backend.predict(
+                test_data)
+            diffs.append(logits_per_image)
+
+            for j in range(len(logits_per_image)):
+                probs = logits_per_image[j]
+
+                if np.argmax(probs) == labels[j]:
+                    good += 1
+                total += 1
+
+        accuracy = round((good / total), 5)
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs,
+                allow_pickle=True)
+        log.info('Batch size is {}, Accuracy: {}'.format(
+            self.dataloader.cur_bs, accuracy))
+        return {"Top-1": accuracy}
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..a224eaf59f720c8476b9ea8b085491f5e2884d5b
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/data_loader.py
@@ -0,0 +1,102 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+from re import T
+import numpy as np
+from general_perf.datasets import data_loader
+from tqdm import tqdm
+
+log = logging.getLogger("CriteoKaggle")
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+
+        log.info("Initial...")
+        self.config = config
+        self.cur_bs = 1
+        if not os.path.exists("general_perf/datasets/{}/numeric.npy".format(
+                self.config['dataset_name'])):
+            from general_perf.datasets.open_criteo_kaggle.preprocess_dataset import csv_to_numpy
+            csv_to_numpy(
+                "general_perf/datasets/{}/eval.csv".format(
+                    self.config['dataset_name']),
+                "general_perf/datasets/{}/".format(self.config['dataset_name']))
+
+        num = np.load("general_perf/datasets/{}/numeric.npy".format(
+            self.config['dataset_name']))
+        cat = np.load("general_perf/datasets/{}/categorical.npy".format(
+            self.config['dataset_name']))
+        label = np.load("general_perf/datasets/{}/label.npy".format(
+            self.config['dataset_name']))
+        self.items = len(num)
+        self.batch_num = int(self.items / self.cur_bs)
+        self.feed_dict = {}
+        for i in tqdm(range(cat.shape[0])):
+            if i == 0:
+                self.feed_dict["new_categorical_placeholder:0"] = list(
+                    cat[i].reshape(-1, 2))
+                self.feed_dict["new_numeric_placeholder:0"] = list(
+                    num[i].reshape(1, -1))
+                self.feed_dict["label"] = list(label[i])
+            else:
+                self.feed_dict["new_categorical_placeholder:0"].extend(
+                    cat[i].reshape(-1, 2))
+                self.feed_dict["new_numeric_placeholder:0"].extend(
+                    num[i].reshape(1, -1))
+                self.feed_dict["label"].extend(label[i])
+        self.feed_dict['new_categorical_placeholder:0'] = np.array(
+            self.feed_dict['new_categorical_placeholder:0'], dtype=np.int64)
+        self.feed_dict['new_numeric_placeholder:0'] = np.array(
+            self.feed_dict['new_numeric_placeholder:0'], dtype=np.float32)
+        self.feed_dict['label'] = np.array(self.feed_dict['label'],
+                                           dtype=np.int64)
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.cur_bs, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        self.labels = []
+        for i in tqdm(range(self.batch_num)):
+            split_data = {
+                'new_categorical_placeholder:0':
+                self.feed_dict["new_categorical_placeholder:0"][i *
+                                                                self.cur_bs *
+                                                                26:(i + 1) *
+                                                                self.cur_bs *
+                                                                26, ],
+                'new_numeric_placeholder:0':
+                self.feed_dict["new_numeric_placeholder:0"][
+                    i * self.cur_bs:(i + 1) * self.cur_bs, ],
+            }
+            self.labels.append(
+                self.feed_dict["label"][i * self.cur_bs:(i + 1) *
+                                        self.cur_bs, ])
+            self.batched_data.append(split_data)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/preprocess_dataset.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/preprocess_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b38adf830c586fc706592801cd8f3f733c663888
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/preprocess_dataset.py
@@ -0,0 +1,174 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import pandas
+import argparse
+import numpy as np
+import tensorflow as tf
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input',
+                        type=str,
+                        default="eval.csv",
+                        help='full path of data file e.g. eval.csv',
+                        dest='evaldatafile_path',
+                        required=True)
+
+    args = parser.parse_args()
+    return args
+
+
+def version_is_less_than(a, b):
+    a_parts = a.split('.')
+    b_parts = b.split('.')
+
+    for i in range(len(a_parts)):
+        if int(a_parts[i]) < int(b_parts[i]):
+            print('{} < {}, version_is_less_than() returning False'.format(
+                a_parts[i], b_parts[i]))
+            return True
+    return False
+
+
+def csv_to_numpy(eval_csv_file, output):
+    print("TensorFlow version {}".format(tf.__version__))
+    required_tf_version = '2.0.0'
+
+    if version_is_less_than(tf.__version__, required_tf_version):
+        tf.compat.v1.enable_eager_execution()
+
+    # args = parse_args()
+    # eval_csv_file = args.evaldatafile_path
+
+    csv = pandas.read_csv(eval_csv_file, header=None)
+    if len(csv.columns) == 39:
+        dataset_type = 'test'
+    else:
+        dataset_type = 'eval'
+
+    fill_na_dict = {}
+    if dataset_type == 'test':
+        for i in range(0, 13):
+            fill_na_dict[i] = 0.0
+        for i in range(13, 39):
+            fill_na_dict[i] = ""
+    else:
+        for i in range(1, 14):
+            fill_na_dict[i] = 0.0
+        for i in range(14, 40):
+            fill_na_dict[i] = ""
+
+    csv = csv.fillna(value=fill_na_dict).values
+
+    LABEL_COLUMN = ["clicked"]
+    CATEGORICAL_COLUMNS1 = ["C" + str(i) + "_embedding" for i in range(1, 27)]
+    NUMERIC_COLUMNS1 = ["I" + str(i) for i in range(1, 14)]
+    CATEGORICAL_COLUMNS2 = ["C" + str(i) + "_embedding" for i in range(1, 27)]
+    NUMERIC_COLUMNS2 = ["I" + str(i) for i in range(1, 14)]
+
+    DATA_COLUMNS = LABEL_COLUMN + NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1
+
+    CATEGORICAL_COLUMNS1.sort()
+    NUMERIC_COLUMNS1.sort()
+
+    with open(eval_csv_file, 'r') as f:
+        nums = [line.strip('\n\r').split(',') for line in f.readlines()]
+        numpy_arr = np.array(nums)
+        numpy_arr[numpy_arr == ''] = '0'
+        min_list, max_list, range_list = [], [], []
+
+        for i in range(len(DATA_COLUMNS)):
+            if DATA_COLUMNS[i] in NUMERIC_COLUMNS1:
+                col_min = numpy_arr[:, i].astype(np.float32).min()
+                col_max = numpy_arr[:, i].astype(np.float32).max()
+                min_list.append(col_min)
+                max_list.append(col_max)
+                range_list.append(col_max - col_min)
+
+        print('min list', min_list)
+        print('max list', max_list)
+        print('range list', range_list)
+
+    all_data = []
+    no_of_rows = 0
+    for row in csv:
+        no_of_rows = no_of_rows + 1
+        unnormalized_vals = np.array(row[1:14])
+        normalized_vals = (unnormalized_vals - min_list) / range_list
+        new_categorical_dict = dict(zip(CATEGORICAL_COLUMNS2, row[14:40]))
+
+        new_categorical_list = []
+        for i in CATEGORICAL_COLUMNS1:
+            if pandas.isnull(new_categorical_dict[i]):
+                new_categorical_list.append("")
+            else:
+                new_categorical_list.append(new_categorical_dict[i])
+
+        if tf.executing_eagerly():
+            hash_values = tf.strings.to_hash_bucket_fast(
+                new_categorical_list, 1000).numpy()
+        else:
+            hash_tensor = tf.strings.to_hash_bucket_fast(
+                new_categorical_list, 1000)
+            with tf.compat.v1.Session() as sess:
+                hash_values = hash_tensor.eval()
+
+        new_numerical_dict = dict(zip(NUMERIC_COLUMNS2, normalized_vals))
+
+        item_data = {
+            "new_numeric_placeholder": [],
+            "new_categorical_placeholder": [],
+            "label": []
+        }
+
+        for i in NUMERIC_COLUMNS1:
+            item_data["new_numeric_placeholder"].extend(
+                [new_numerical_dict[i]])
+
+        for i in range(0, 26):
+            item_data["new_categorical_placeholder"].extend([i])
+            item_data["new_categorical_placeholder"].extend([hash_values[i]])
+
+        item_data["label"].append(row[0])
+
+        all_data.append(item_data)
+
+    wnd_num = []
+    wnd_cate = []
+    wnd_lable = []
+
+    for data in all_data:
+        wnd_num.append(data["new_numeric_placeholder"])
+        wnd_cate.append(data["new_categorical_placeholder"])
+        wnd_lable.append(data["label"])
+
+    np.save(os.path.join(output, "numeric.npy"), np.array(wnd_num))
+    np.save(os.path.join(output, "categorical.npy"), np.array(wnd_cate))
+    np.save(os.path.join(output, "label.npy"), np.array(wnd_lable))
+
+    print('Total number of rows ', no_of_rows)
+    print(
+        'Generated output file name : wnd_num.npy, wnd_cate.npy, wnd_label.npy'
+    )
+
+
+if __name__ == "__main__":
+    csv_to_numpy()
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/test_accuracy.py
new file mode 100644
index 0000000000000000000000000000000000000000..145e9cb3110e16361f1029aa941ca4dcf3ce08eb
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/test_accuracy.py
@@ -0,0 +1,47 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numpy as np
+from general_perf.datasets import test_accuracy
+from tqdm import tqdm
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+        good, total = 0, 0
+        diffs = []
+        for i in tqdm(range(num)):
+            test_data, labels = self.dataloader.get_samples(i)
+
+            results = self.runtime_backend.predict(test_data)
+            results = results[list(results)[0]]
+            diffs.append(results)
+
+            for j in range(len(results)):
+                if np.argmax(results[j].round()) == labels[j].round():
+                    good += 1
+                total += 1
+
+        accuracy = round((good / total), 5)
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs)
+        log.info('Batch size is {}, Accuracy: {}'.format(
+            self.dataloader.cur_bs, accuracy))
+        return {"Top-1": accuracy}
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..488ae1abd89532f3a3d9beccada6493eddfc37ab
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/data_loader.py
@@ -0,0 +1,260 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+from os.path import split
+import re
+import time
+
+import cv2
+import numpy as np
+import random
+from tqdm import tqdm
+
+from general_perf.datasets import data_loader
+
+log = logging.getLogger("Imagenet")
+
+INPUT_TYPE = {
+    "UINT8": np.uint8,
+    "FLOAT32": np.float32,
+}
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+        log.info("Initial...")
+
+        self.config = config
+        self.cur_bs = 1
+        self.image_size = [224, 224, 3]
+
+        if self.config['framework'] == 'Tensorflow':
+            image_format = "NHWC"
+            pre_process = pre_process_vgg
+        else:
+            image_format = "NCHW"
+            if 'resnet50' in self.config['model']:
+                pre_process = pre_process_imagenet_pytorch
+            else:
+                pre_process = pre_process_imagenet_vit
+
+        cache_dir = os.getcwd() + \
+            "/general_perf/datasets/{}".format(self.config['dataset_name'])
+        self.input_name = self.config['inputs']
+        self.image_list = []
+        self.label_list = []
+        self.count = None
+        self.use_cache = 0
+        self.cache_dir = os.path.join(cache_dir, "preprocessed",
+                                      self.config['model'])
+        self.data_path = "general_perf/datasets/{}/ILSVRC2012_img_val".format(
+            self.config['dataset_name'])
+        self.pre_process = pre_process
+        self.items = 0
+        # input images are in HWC
+        self.need_transpose = True if image_format == "NCHW" else False
+        not_found = 0
+        os.makedirs(self.cache_dir, exist_ok=True)
+
+        image_list = 'general_perf/datasets/{}/val_map.txt'.format(
+            self.config['dataset_name'])
+
+        start = time.time()
+        with open(image_list, 'r') as f:
+            for s in tqdm(f):
+                image_name, label = re.split(r"\s+", s.strip())
+                src = os.path.join(self.data_path, image_name)
+                if not os.path.exists(src):
+                    # if the image does not exists ignore it
+                    not_found += 1
+                    continue
+                os.makedirs(os.path.dirname(
+                    os.path.join(self.cache_dir, image_name)),
+                            exist_ok=True)
+                dst = os.path.join(self.cache_dir, image_name)
+                if not os.path.exists(dst + ".npy"):
+                    img_org = cv2.imread(src)
+                    processed = self.pre_process(
+                        img_org,
+                        need_transpose=self.need_transpose,
+                        dims=self.image_size)
+                    np.save(dst, processed)
+
+                self.image_list.append(image_name)
+                self.label_list.append(int(label) + 1)
+                self.items = len(self.image_list)
+
+                # limit the dataset if requested
+                if self.count and len(self.image_list) >= self.count:
+                    break
+
+        time_taken = time.time() - start
+        if not self.image_list:
+            log.error("no images in image list found")
+            raise ValueError("no images in image list found")
+        if not_found > 0:
+            log.info("reduced image list, %d images not found", not_found)
+
+        log.info("loaded {} images, cache={}, took={:.1f}sec".format(
+            len(self.image_list), self.use_cache, time_taken))
+
+        self.label_list = np.array(self.label_list)
+        self.batch_num = int(self.items / self.cur_bs)
+        self.shuffle_index = [i for i in range(self.items)]
+        random.seed(7)
+        random.shuffle(self.shuffle_index)
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.cur_bs, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        self.labels = []
+        for i in tqdm(range(self.batch_num)):
+            split_data, labels = [], []
+            for j in range(i * self.cur_bs, (i + 1) * self.cur_bs):
+                output, label = self.get_item(self.shuffle_index[j])
+                split_data.append(output)
+                labels.append(label)
+
+            self.labels.append(labels)
+            self.batched_data.append({self.input_name: np.array(split_data)})
+
+    def get_samples(self, sample_id):
+        if sample_id >= len(self.batched_data) or sample_id < 0:
+            raise ValueError("Your Input ID: {} is out of range: {}".format(
+                sample_id, len(self.batched_data)))
+        return self.batched_data[sample_id], self.labels[sample_id]
+
+    def get_item(self, nr):
+        """Get image by number in the list."""
+        dst = os.path.join(self.cache_dir, self.image_list[nr])
+        img = np.load(dst + ".npy")
+        return img, self.label_list[nr]
+
+
+#
+# pre-processing
+#
+def center_crop(img, out_height, out_width):
+    height, width, _ = img.shape
+    left = int((width - out_width) / 2)
+    right = int((width + out_width) / 2)
+    top = int((height - out_height) / 2)
+    bottom = int((height + out_height) / 2)
+    img = img[top:bottom, left:right]
+    return img
+
+
+def resize_with_aspectratio(img,
+                            out_height,
+                            out_width,
+                            scale=87.5,
+                            inter_pol=cv2.INTER_LINEAR):
+    height, width, _ = img.shape
+    new_height = int(100. * out_height / scale)
+    new_width = int(100. * out_width / scale)
+    if height > width:
+        w = new_width
+        h = int(new_height * height / width)
+    else:
+        h = new_height
+        w = int(new_width * width / height)
+    img = cv2.resize(img, (w, h), interpolation=inter_pol)
+    return img
+
+
+def pre_process_vgg(img, dims=None, need_transpose=False):
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+    output_height, output_width, _ = dims
+    cv2_interpol = cv2.INTER_AREA
+    img = resize_with_aspectratio(img,
+                                  output_height,
+                                  output_width,
+                                  inter_pol=cv2_interpol)
+    img = center_crop(img, output_height, output_width)
+    img = np.asarray(img, dtype='float32')
+
+    # normalize image
+    means = np.array([123.68, 116.78, 103.94], dtype=np.float32)
+    img -= means
+
+    # transpose if needed
+    if need_transpose:
+        img = img.transpose([2, 0, 1])
+    return img
+
+
+def pre_process_imagenet_pytorch(img, dims=None, need_transpose=False):
+    from PIL import Image
+    import torchvision.transforms.functional as F
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = Image.fromarray(img)
+    img = F.resize(img, 256, Image.BILINEAR)
+    img = F.center_crop(img, 224)
+    img = F.to_tensor(img)
+    img = F.normalize(img,
+                      mean=[0.485, 0.456, 0.406],
+                      std=[0.229, 0.224, 0.225],
+                      inplace=False)
+    if not need_transpose:
+        img = img.permute(1, 2, 0)  # NHWC
+    img = np.asarray(img, dtype='float32')
+    return img
+
+def pre_process_imagenet_vit(img, dims=None, need_transpose=False):
+    from PIL import Image
+    import torchvision.transforms.functional as F
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = Image.fromarray(img)
+    img = F.resize(img, 256, Image.BILINEAR)
+    img = F.center_crop(img, 384)
+    img = F.to_tensor(img)
+    img = F.normalize(img,
+                      mean=[0.485, 0.456, 0.406],
+                      std=[0.229, 0.224, 0.225],
+                      inplace=False)
+    if not need_transpose:
+        img = img.permute(1, 2, 0)  # NHWC
+    img = np.asarray(img, dtype='float32')
+    return img
+
+
+def maybe_resize(img, dims):
+    img = np.array(img, dtype=np.float32)
+    if len(img.shape) < 3 or img.shape[2] != 3:
+        # some images might be grayscale
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    if dims != None:
+        im_height, im_width, _ = dims
+        img = cv2.resize(img, (im_width, im_height),
+                         interpolation=cv2.INTER_LINEAR)
+    return img
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/test_accuracy.py
new file mode 100644
index 0000000000000000000000000000000000000000..6275aaf21210842c055190d0cae4d533e8504e12
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_imagenet/test_accuracy.py
@@ -0,0 +1,66 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import numpy as np
+from general_perf.datasets import test_accuracy
+from tqdm import tqdm
+import torch
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+        good, total = 0, 0
+        diffs = []
+        for i in tqdm(range(num)):
+            test_data, labels = self.dataloader.get_samples(i)
+
+            results = self.runtime_backend.predict(test_data)
+            if "resnet50-tf-fp16" in self.configs["model"]:
+                if 'classes' in results:
+                    del results['classes']
+            results = self._post_processing(results, self.configs['framework'])
+            diffs.append(results)
+            for j in range(len(results)):
+                if np.argmax(results[j]) == labels[j]:
+                    good += 1
+                total += 1
+        accuracy = round((good / total), 5)
+        log.info('Batch size is {}, Accuracy: {}'.format(
+            self.dataloader.cur_bs, accuracy))
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs)
+        return {"Top-1": accuracy}
+
+    def _post_processing(self, inputs, framework):
+        if framework == "Onnx":
+            if isinstance(inputs, list):
+                inputs = list(inputs[0])
+            elif isinstance(inputs, dict):
+                key = list(inputs.keys())[0]
+                inputs = list(inputs[key])
+        else:
+            if isinstance(inputs, tuple):
+                inputs = inputs[0].float().cpu().numpy().astype(float) if inputs[0].dtype==torch.bfloat16 else inputs[0].cpu().numpy().astype(float)
+            else:
+                inputs = inputs[list(inputs)[0]]
+        if framework == "Pytorch" or framework == "Onnx":
+            inputs = np.array(
+                [np.insert(inputs[i], 0, 0) for i in range(len(inputs))])
+        return inputs
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/accuracy_squad.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/accuracy_squad.py
new file mode 100644
index 0000000000000000000000000000000000000000..18c97dd41766d8a9eee7c07ac28c471973d51d61
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/accuracy_squad.py
@@ -0,0 +1,322 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import collections
+import json
+import math
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+import numpy as np
+import six
+from bert import tokenization
+
+# To support feature cache.
+import pickle
+
+max_seq_length = 384
+max_query_length = 64
+doc_stride = 128
+
+RawResult = collections.namedtuple("RawResult",
+                                   ["unique_id", "start_logits", "end_logits"])
+
+dtype_map = {
+    "int8": np.int8,
+    "int16": np.int16,
+    "int32": np.int32,
+    "int64": np.int64,
+    "float16": np.float16,
+    "float32": np.float32,
+    "float64": np.float64
+}
+
+
+def get_final_text(pred_text, orig_text, do_lower_case):
+    """Project the tokenized prediction back to the original text."""
+
+    # When we created the data, we kept track of the alignment between original
+    # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So
+    # now `orig_text` contains the span of our original text corresponding to the
+    # span that we predicted.
+    #
+    # However, `orig_text` may contain extra characters that we don't want in
+    # our prediction.
+    #
+    # For example, let's say:
+    #     pred_text = steve smith
+    #     orig_text = Steve Smith's
+    #
+    # We don't want to return `orig_text` because it contains the extra "'s".
+    #
+    # We don't want to return `pred_text` because it's already been normalized
+    # (the SQuAD eval script also does punctuation stripping/lower casing but
+    # our tokenizer does additional normalization like stripping accent
+    # characters).
+    #
+    # What we really want to return is "Steve Smith".
+    #
+    # Therefore, we have to apply a semi-complicated alignment heruistic between
+    # `pred_text` and `orig_text` to get a character-to-charcter alignment. This
+    # can fail in certain cases in which case we just return `orig_text`.
+
+    def _strip_spaces(text):
+        ns_chars = []
+        ns_to_s_map = collections.OrderedDict()
+        for (i, c) in enumerate(text):
+            if c == " ":
+                continue
+            ns_to_s_map[len(ns_chars)] = i
+            ns_chars.append(c)
+        ns_text = "".join(ns_chars)
+        return (ns_text, ns_to_s_map)
+
+    # We first tokenize `orig_text`, strip whitespace from the result
+    # and `pred_text`, and check if they are the same length. If they are
+    # NOT the same length, the heuristic has failed. If they are the same
+    # length, we assume the characters are one-to-one aligned.
+    tokenizer = tokenization.BasicTokenizer(do_lower_case=do_lower_case)
+
+    tok_text = " ".join(tokenizer.tokenize(orig_text))
+    start_position = tok_text.find(pred_text)
+    if start_position == -1:
+        return orig_text
+    end_position = start_position + len(pred_text) - 1
+
+    (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text)
+    (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text)
+
+    if len(orig_ns_text) != len(tok_ns_text):
+        return orig_text
+
+    # We then project the characters in `pred_text` back to `orig_text` using
+    # the character-to-character alignment.
+    tok_s_to_ns_map = {}
+    for (i, tok_index) in six.iteritems(tok_ns_to_s_map):
+        tok_s_to_ns_map[tok_index] = i
+
+    orig_start_position = None
+    if start_position in tok_s_to_ns_map:
+        ns_start_position = tok_s_to_ns_map[start_position]
+        if ns_start_position in orig_ns_to_s_map:
+            orig_start_position = orig_ns_to_s_map[ns_start_position]
+
+    if orig_start_position is None:
+        return orig_text
+
+    orig_end_position = None
+    if end_position in tok_s_to_ns_map:
+        ns_end_position = tok_s_to_ns_map[end_position]
+        if ns_end_position in orig_ns_to_s_map:
+            orig_end_position = orig_ns_to_s_map[ns_end_position]
+
+    if orig_end_position is None:
+        return orig_text
+
+    output_text = orig_text[orig_start_position:(orig_end_position + 1)]
+    return output_text
+
+
+def _get_best_indexes(logits, n_best_size):
+    """Get the n-best logits from a list."""
+    index_and_score = sorted(enumerate(logits),
+                             key=lambda x: x[1],
+                             reverse=True)
+
+    best_indexes = []
+    for i in range(len(index_and_score)):
+        if i >= n_best_size:
+            break
+        best_indexes.append(index_and_score[i][0])
+    return best_indexes
+
+
+def _compute_softmax(scores):
+    """Compute softmax probability over raw logits."""
+    if not scores:
+        return []
+
+    max_score = None
+    for score in scores:
+        if max_score is None or score > max_score:
+            max_score = score
+
+    exp_scores = []
+    total_sum = 0.0
+    for score in scores:
+        x = math.exp(score - max_score)
+        exp_scores.append(x)
+        total_sum += x
+
+    probs = []
+    for score in exp_scores:
+        probs.append(score / total_sum)
+    return probs
+
+
+def write_predictions(all_examples,
+                      all_features,
+                      all_results,
+                      n_best_size,
+                      max_answer_length,
+                      do_lower_case,
+                      output_prediction_file,
+                      max_examples=None):
+    """Write final predictions to the json file and log-odds of null if needed."""
+    print("Writing predictions to: %s" % (output_prediction_file))
+
+    example_index_to_features = collections.defaultdict(list)
+    for feature in all_features:
+        example_index_to_features[feature.example_index].append(feature)
+
+    unique_id_to_result = {}
+    for result in all_results:
+        unique_id_to_result[result.unique_id] = result
+
+    _PrelimPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+        "PrelimPrediction", [
+            "feature_index", "start_index", "end_index", "start_logit",
+            "end_logit"
+        ])
+
+    all_predictions = collections.OrderedDict()
+    all_nbest_json = collections.OrderedDict()
+    scores_diff_json = collections.OrderedDict()
+
+    for (example_index, example) in enumerate(all_examples):
+        if max_examples and example_index == max_examples: break
+
+        features = example_index_to_features[example_index]
+
+        prelim_predictions = []
+        # keep track of the minimum score of null start+end of position 0
+        score_null = 1000000  # large and positive
+        min_null_feature_index = 0  # the paragraph slice with min mull score
+        null_start_logit = 0  # the start logit at the slice with min null score
+        null_end_logit = 0  # the end logit at the slice with min null score
+        for (feature_index, feature) in enumerate(features):
+            # FIX: During compliance/audit runs, we only generate a small subset of
+            # all entries from the dataset. As a result, sometimes dict retrieval
+            # fails because a key is missing.
+            # result = unique_id_to_result[feature.unique_id]
+            result = unique_id_to_result.get(feature.unique_id, None)
+            if result is None:
+                continue
+            start_indexes = _get_best_indexes(result.start_logits, n_best_size)
+            end_indexes = _get_best_indexes(result.end_logits, n_best_size)
+            # if we could have irrelevant answers, get the min score of irrelevant
+            for start_index in start_indexes:
+                for end_index in end_indexes:
+                    # We could hypothetically create invalid predictions, e.g., predict
+                    # that the start of the span is in the question. We throw out all
+                    # invalid predictions.
+                    if start_index >= len(feature.tokens):
+                        continue
+                    if end_index >= len(feature.tokens):
+                        continue
+                    if start_index not in feature.token_to_orig_map:
+                        continue
+                    if end_index not in feature.token_to_orig_map:
+                        continue
+                    if not feature.token_is_max_context.get(
+                            start_index, False):
+                        continue
+                    if end_index < start_index:
+                        continue
+                    length = end_index - start_index + 1
+                    if length > max_answer_length:
+                        continue
+                    prelim_predictions.append(
+                        _PrelimPrediction(
+                            feature_index=feature_index,
+                            start_index=start_index,
+                            end_index=end_index,
+                            start_logit=result.start_logits[start_index],
+                            end_logit=result.end_logits[end_index]))
+
+        prelim_predictions = sorted(prelim_predictions,
+                                    key=lambda x:
+                                    (x.start_logit + x.end_logit),
+                                    reverse=True)
+
+        _NbestPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+            "NbestPrediction", ["text", "start_logit", "end_logit"])
+
+        seen_predictions = {}
+        nbest = []
+        for pred in prelim_predictions:
+            if len(nbest) >= n_best_size:
+                break
+            feature = features[pred.feature_index]
+            tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)]
+            orig_doc_start = feature.token_to_orig_map[pred.start_index]
+            orig_doc_end = feature.token_to_orig_map[pred.end_index]
+            orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)]
+            tok_text = " ".join(tok_tokens)
+
+            # De-tokenize WordPieces that have been split off.
+            tok_text = tok_text.replace(" ##", "")
+            tok_text = tok_text.replace("##", "")
+
+            # Clean whitespace
+            tok_text = tok_text.strip()
+            tok_text = " ".join(tok_text.split())
+            orig_text = " ".join(orig_tokens)
+
+            final_text = get_final_text(tok_text, orig_text, do_lower_case)
+            if final_text in seen_predictions:
+                continue
+
+            seen_predictions[final_text] = True
+            nbest.append(
+                _NbestPrediction(text=final_text,
+                                 start_logit=pred.start_logit,
+                                 end_logit=pred.end_logit))
+
+        # In very rare edge cases we could have no valid predictions. So we
+        # just create a nonce prediction in this case to avoid failure.
+        if not nbest:
+            nbest.append(
+                _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
+
+        assert len(nbest) >= 1
+
+        total_scores = []
+        best_non_null_entry = None
+        for entry in nbest:
+            total_scores.append(entry.start_logit + entry.end_logit)
+            if not best_non_null_entry:
+                if entry.text:
+                    best_non_null_entry = entry
+
+        probs = _compute_softmax(total_scores)
+
+        nbest_json = []
+        for (i, entry) in enumerate(nbest):
+            output = collections.OrderedDict()
+            output["text"] = entry.text
+            output["probability"] = probs[i]
+            output["start_logit"] = entry.start_logit
+            output["end_logit"] = entry.end_logit
+            nbest_json.append(output)
+
+        assert len(nbest_json) >= 1
+
+        all_predictions[example.qas_id] = nbest_json[0]["text"]
+
+    with open(output_prediction_file, "w") as writer:
+        writer.write(json.dumps(all_predictions, indent=4) + "\n")
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/evaluate-v1.1.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/evaluate.py
similarity index 60%
rename from models/nlp/language_model/bert_base_squad/ixrt/python/evaluate-v1.1.py
rename to toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/evaluate.py
index ba4ee19094e492b5caecef414da90adf2dba8514..177e136dfdb3fb5294c4213e1a22e79345b78723 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/evaluate-v1.1.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/bert/evaluate.py
@@ -1,38 +1,17 @@
-#!/usr/bin/env python3
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
+# Copyright 2023 ByteDance and/or its affiliates.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-# http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-
-# Obtained from https://rajpurkar.github.io/SQuAD-explorer/
 
-""" Official evaluation script for v1.1 of the SQuAD dataset. """
 from __future__ import print_function
 from collections import Counter
 import string
@@ -41,6 +20,7 @@ import argparse
 import json
 import sys
 
+
 def normalize_answer(s):
     """Lower text and remove punctuation, articles and extra whitespace."""
     def remove_articles(text):
@@ -83,7 +63,8 @@ def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
         scores_for_ground_truths.append(score)
     return max(scores_for_ground_truths)
 
-def evaluate(dataset, predictions, f1_acc):
+
+def evaluate(dataset, predictions, num):
     f1 = exact_match = total = 0
     for article in dataset:
         for paragraph in article['paragraphs']:
@@ -98,33 +79,24 @@ def evaluate(dataset, predictions, f1_acc):
                 prediction = predictions[qa['id']]
                 exact_match += metric_max_over_ground_truths(
                     exact_match_score, prediction, ground_truths)
-                f1 += metric_max_over_ground_truths(
-                    f1_score, prediction, ground_truths)
-
-    exact_match = 100.0 * exact_match / total
-    f1 = 100.0 * f1 / total
-    if (f1 < f1_acc - 0.5):
-        print("&&&& FAILED TensorRT BERT Squad Accuracy matches reference.")
-    else:
-        print("&&&& PASSED TensorRT BERT Squad Accuracy matches reference.")
-    return {'exact_match': exact_match, 'f1': f1}
-
-if __name__ == '__main__':
+                f1 += metric_max_over_ground_truths(f1_score, prediction,
+                                                    ground_truths)
+    total = num
+    exact_match = round(100.0 * exact_match / total, 5)
+    f1 = round(100.0 * f1 / total, 5)
+
+    return {'Exact Match': exact_match, 'F1 Score': f1}
+
+
+def check_accuracy(dataset_file, prediction_file, num):
     expected_version = '1.1'
-    parser = argparse.ArgumentParser(
-        description='Evaluation for SQuAD ' + expected_version)
-    parser.add_argument('dataset_file', help='Dataset file')
-    parser.add_argument('prediction_file', help='Prediction File')
-    parser.add_argument('f1_acc', help='Reference Accuracy')
-    args = parser.parse_args()
-    with open(args.dataset_file) as dataset_file:
+    with open(dataset_file) as dataset_file:
         dataset_json = json.load(dataset_file)
         if (dataset_json['version'] != expected_version):
             print('Evaluation expects v-' + expected_version +
                   ', but got dataset with v-' + dataset_json['version'],
                   file=sys.stderr)
         dataset = dataset_json['data']
-    with open(args.prediction_file) as prediction_file:
+    with open(prediction_file) as prediction_file:
         predictions = json.load(prediction_file)
-        f1_acc = float(args.f1_acc)
-    print(json.dumps(evaluate(dataset, predictions, f1_acc)))
+    return evaluate(dataset, predictions, num)
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/create_squad_data.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/create_squad_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff84c61e62c49e05fa17f148a4db02285458b4d1
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/create_squad_data.py
@@ -0,0 +1,427 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import tokenization
+import six
+
+
+class SquadExample(object):
+    """A single training/test example for simple sequence classification.
+     For examples without an answer, the start and end position are -1.
+  """
+    def __init__(self,
+                 qas_id,
+                 question_text,
+                 doc_tokens,
+                 orig_answer_text=None,
+                 start_position=None,
+                 end_position=None,
+                 is_impossible=False):
+        self.qas_id = qas_id
+        self.question_text = question_text
+        self.doc_tokens = doc_tokens
+        self.orig_answer_text = orig_answer_text
+        self.start_position = start_position
+        self.end_position = end_position
+        self.is_impossible = is_impossible
+
+    def __str__(self):
+        return self.__repr__()
+
+    def __repr__(self):
+        s = ""
+        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
+        s += ", question_text: %s" % (tokenization.printable_text(
+            self.question_text))
+        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
+        if self.start_position:
+            s += ", start_position: %d" % (self.start_position)
+        if self.start_position:
+            s += ", end_position: %d" % (self.end_position)
+        if self.start_position:
+            s += ", is_impossible: %r" % (self.is_impossible)
+        return s
+
+
+class InputFeatures(object):
+    """A single set of features of data."""
+    def __init__(self,
+                 unique_id,
+                 example_index,
+                 doc_span_index,
+                 tokens,
+                 token_to_orig_map,
+                 token_is_max_context,
+                 input_ids,
+                 input_mask,
+                 segment_ids,
+                 start_position=None,
+                 end_position=None,
+                 is_impossible=None):
+        self.unique_id = unique_id
+        self.example_index = example_index
+        self.doc_span_index = doc_span_index
+        self.tokens = tokens
+        self.token_to_orig_map = token_to_orig_map
+        self.token_is_max_context = token_is_max_context
+        self.input_ids = input_ids
+        self.input_mask = input_mask
+        self.segment_ids = segment_ids
+        self.start_position = start_position
+        self.end_position = end_position
+        self.is_impossible = is_impossible
+
+
+def read_squad_examples(input_file,
+                        is_training,
+                        version_2_with_negative=False):
+    """Read a SQuAD json file into a list of SquadExample."""
+    with open(input_file) as reader:
+        input_data = json.load(reader)["data"]
+
+    def is_whitespace(c):
+        if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
+            return True
+        return False
+
+    examples = []
+    for entry in input_data:
+        for paragraph in entry["paragraphs"]:
+            paragraph_text = paragraph["context"]
+            doc_tokens = []
+            char_to_word_offset = []
+            prev_is_whitespace = True
+            for c in paragraph_text:
+                if is_whitespace(c):
+                    prev_is_whitespace = True
+                else:
+                    if prev_is_whitespace:
+                        doc_tokens.append(c)
+                    else:
+                        doc_tokens[-1] += c
+                    prev_is_whitespace = False
+                char_to_word_offset.append(len(doc_tokens) - 1)
+
+            for qa in paragraph["qas"]:
+                qas_id = qa["id"]
+                question_text = qa["question"]
+                start_position = None
+                end_position = None
+                orig_answer_text = None
+                is_impossible = False
+                if is_training:
+
+                    if version_2_with_negative:
+                        is_impossible = qa["is_impossible"]
+                    if (len(qa["answers"]) != 1) and (not is_impossible):
+                        raise ValueError(
+                            "For training, each question should have exactly 1 answer."
+                        )
+                    if not is_impossible:
+                        answer = qa["answers"][0]
+                        orig_answer_text = answer["text"]
+                        answer_offset = answer["answer_start"]
+                        answer_length = len(orig_answer_text)
+                        start_position = char_to_word_offset[answer_offset]
+                        end_position = char_to_word_offset[answer_offset +
+                                                           answer_length - 1]
+                        # Only add answers where the text can be exactly recovered from the
+                        # document. If this CAN'T happen it's likely due to weird Unicode
+                        # stuff so we will just skip the example.
+                        #
+                        # Note that this means for training mode, every example is NOT
+                        # guaranteed to be preserved.
+                        actual_text = " ".join(
+                            doc_tokens[start_position:(end_position + 1)])
+                        cleaned_answer_text = " ".join(
+                            tokenization.whitespace_tokenize(orig_answer_text))
+                        if actual_text.find(cleaned_answer_text) == -1:
+                            print("Could not find answer: '%s' vs. '%s'",
+                                  actual_text, cleaned_answer_text)
+                            continue
+                    else:
+                        start_position = -1
+                        end_position = -1
+                        orig_answer_text = ""
+
+                example = SquadExample(qas_id=qas_id,
+                                       question_text=question_text,
+                                       doc_tokens=doc_tokens,
+                                       orig_answer_text=orig_answer_text,
+                                       start_position=start_position,
+                                       end_position=end_position,
+                                       is_impossible=is_impossible)
+                examples.append(example)
+
+    return examples
+
+
+def _check_is_max_context(doc_spans, cur_span_index, position):
+    """Check if this is the 'max context' doc span for the token."""
+
+    # Because of the sliding window approach taken to scoring documents, a single
+    # token can appear in multiple documents. E.g.
+    #  Doc: the man went to the store and bought a gallon of milk
+    #  Span A: the man went to the
+    #  Span B: to the store and bought
+    #  Span C: and bought a gallon of
+    #  ...
+    #
+    # Now the word 'bought' will have two scores from spans B and C. We only
+    # want to consider the score with "maximum context", which we define as
+    # the *minimum* of its left and right context (the *sum* of left and
+    # right context will always be the same, of course).
+    #
+    # In the example the maximum context for 'bought' would be span C since
+    # it has 1 left context and 3 right context, while span B has 4 left context
+    # and 0 right context.
+    best_score = None
+    best_span_index = None
+    for (span_index, doc_span) in enumerate(doc_spans):
+        end = doc_span.start + doc_span.length - 1
+        if position < doc_span.start:
+            continue
+        if position > end:
+            continue
+        num_left_context = position - doc_span.start
+        num_right_context = end - position
+        score = min(num_left_context,
+                    num_right_context) + 0.01 * doc_span.length
+        if best_score is None or score > best_score:
+            best_score = score
+            best_span_index = span_index
+
+    return cur_span_index == best_span_index
+
+
+def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
+                         orig_answer_text):
+    """Returns tokenized answer spans that better match the annotated answer."""
+
+    # The SQuAD annotations are character based. We first project them to
+    # whitespace-tokenized words. But then after WordPiece tokenization, we can
+    # often find a "better match". For example:
+    #
+    #   Question: What year was John Smith born?
+    #   Context: The leader was John Smith (1895-1943).
+    #   Answer: 1895
+    #
+    # The original whitespace-tokenized answer will be "(1895-1943).". However
+    # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
+    # the exact answer, 1895.
+    #
+    # However, this is not always possible. Consider the following:
+    #
+    #   Question: What country is the top exporter of electornics?
+    #   Context: The Japanese electronics industry is the lagest in the world.
+    #   Answer: Japan
+    #
+    # In this case, the annotator chose "Japan" as a character sub-span of
+    # the word "Japanese". Since our WordPiece tokenizer does not split
+    # "Japanese", we just use "Japanese" as the annotation. This is fairly rare
+    # in SQuAD, but does happen.
+    tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))
+
+    for new_start in range(input_start, input_end + 1):
+        for new_end in range(input_end, new_start - 1, -1):
+            text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
+            if text_span == tok_answer_text:
+                return (new_start, new_end)
+
+    return (input_start, input_end)
+
+
+def convert_examples_to_features(examples,
+                                 tokenizer,
+                                 max_seq_length,
+                                 doc_stride,
+                                 max_query_length,
+                                 is_training,
+                                 output_fn,
+                                 verbose_logging=False):
+    """Loads a data file into a list of `InputBatch`s."""
+
+    unique_id = 1000000000
+
+    for (example_index, example) in enumerate(examples):
+        query_tokens = tokenizer.tokenize(example.question_text)
+
+        if len(query_tokens) > max_query_length:
+            query_tokens = query_tokens[0:max_query_length]
+
+        tok_to_orig_index = []
+        orig_to_tok_index = []
+        all_doc_tokens = []
+        for (i, token) in enumerate(example.doc_tokens):
+            orig_to_tok_index.append(len(all_doc_tokens))
+            sub_tokens = tokenizer.tokenize(token)
+            for sub_token in sub_tokens:
+                tok_to_orig_index.append(i)
+                all_doc_tokens.append(sub_token)
+
+        tok_start_position = None
+        tok_end_position = None
+        if is_training and example.is_impossible:
+            tok_start_position = -1
+            tok_end_position = -1
+        if is_training and not example.is_impossible:
+            tok_start_position = orig_to_tok_index[example.start_position]
+            if example.end_position < len(example.doc_tokens) - 1:
+                tok_end_position = orig_to_tok_index[example.end_position +
+                                                     1] - 1
+            else:
+                tok_end_position = len(all_doc_tokens) - 1
+            (tok_start_position, tok_end_position) = _improve_answer_span(
+                all_doc_tokens, tok_start_position, tok_end_position,
+                tokenizer, example.orig_answer_text)
+
+        # The -3 accounts for [CLS], [SEP] and [SEP]
+        max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
+
+        # We can have documents that are longer than the maximum sequence length.
+        # To deal with this we do a sliding window approach, where we take chunks
+        # of the up to our max length with a stride of `doc_stride`.
+        _DocSpan = collections.namedtuple(  # pylint: disable=invalid-name
+            "DocSpan", ["start", "length"])
+        doc_spans = []
+        start_offset = 0
+        while start_offset < len(all_doc_tokens):
+            length = len(all_doc_tokens) - start_offset
+            if length > max_tokens_for_doc:
+                length = max_tokens_for_doc
+            doc_spans.append(_DocSpan(start=start_offset, length=length))
+            if start_offset + length == len(all_doc_tokens):
+                break
+            start_offset += min(length, doc_stride)
+
+        for (doc_span_index, doc_span) in enumerate(doc_spans):
+            tokens = []
+            token_to_orig_map = {}
+            token_is_max_context = {}
+            segment_ids = []
+            tokens.append("[CLS]")
+            segment_ids.append(0)
+            for token in query_tokens:
+                tokens.append(token)
+                segment_ids.append(0)
+            tokens.append("[SEP]")
+            segment_ids.append(0)
+
+            for i in range(doc_span.length):
+                split_token_index = doc_span.start + i
+                token_to_orig_map[len(
+                    tokens)] = tok_to_orig_index[split_token_index]
+
+                is_max_context = _check_is_max_context(doc_spans,
+                                                       doc_span_index,
+                                                       split_token_index)
+                token_is_max_context[len(tokens)] = is_max_context
+                tokens.append(all_doc_tokens[split_token_index])
+                segment_ids.append(1)
+            tokens.append("[SEP]")
+            segment_ids.append(1)
+
+            input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+            # The mask has 1 for real tokens and 0 for padding tokens. Only real
+            # tokens are attended to.
+            input_mask = [1] * len(input_ids)
+
+            # Zero-pad up to the sequence length.
+            while len(input_ids) < max_seq_length:
+                input_ids.append(0)
+                input_mask.append(0)
+                segment_ids.append(0)
+
+            assert len(input_ids) == max_seq_length
+            assert len(input_mask) == max_seq_length
+            assert len(segment_ids) == max_seq_length
+
+            start_position = None
+            end_position = None
+            if is_training and not example.is_impossible:
+                # For training, if our document chunk does not contain an annotation
+                # we throw it out, since there is nothing to predict.
+                doc_start = doc_span.start
+                doc_end = doc_span.start + doc_span.length - 1
+                out_of_span = False
+                if not (tok_start_position >= doc_start
+                        and tok_end_position <= doc_end):
+                    out_of_span = True
+                if out_of_span:
+                    start_position = 0
+                    end_position = 0
+                else:
+                    doc_offset = len(query_tokens) + 2
+                    start_position = tok_start_position - doc_start + doc_offset
+                    end_position = tok_end_position - doc_start + doc_offset
+
+            if is_training and example.is_impossible:
+                start_position = 0
+                end_position = 0
+
+            if verbose_logging and example_index < 20:
+                print("*** Example ***")
+                print("unique_id: %s" % (unique_id))
+                print("example_index: %s" % (example_index))
+                print("doc_span_index: %s" % (doc_span_index))
+                print(
+                    "tokens: %s" %
+                    " ".join([tokenization.printable_text(x) for x in tokens]))
+                print("token_to_orig_map: %s" % " ".join([
+                    "%d:%d" % (x, y)
+                    for (x, y) in six.iteritems(token_to_orig_map)
+                ]))
+                print("token_is_max_context: %s" % " ".join([
+                    "%d:%s" % (x, y)
+                    for (x, y) in six.iteritems(token_is_max_context)
+                ]))
+                print("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+                print("input_mask: %s" % " ".join([str(x)
+                                                   for x in input_mask]))
+                print("segment_ids: %s" %
+                      " ".join([str(x) for x in segment_ids]))
+                if is_training and example.is_impossible:
+                    print("impossible example")
+                if is_training and not example.is_impossible:
+                    answer_text = " ".join(
+                        tokens[start_position:(end_position + 1)])
+                    print("start_position: %d" % (start_position))
+                    print("end_position: %d" % (end_position))
+                    print("answer: %s" %
+                          (tokenization.printable_text(answer_text)))
+
+            feature = InputFeatures(unique_id=unique_id,
+                                    example_index=example_index,
+                                    doc_span_index=doc_span_index,
+                                    tokens=tokens,
+                                    token_to_orig_map=token_to_orig_map,
+                                    token_is_max_context=token_is_max_context,
+                                    input_ids=input_ids,
+                                    input_mask=input_mask,
+                                    segment_ids=segment_ids,
+                                    start_position=start_position,
+                                    end_position=end_position,
+                                    is_impossible=example.is_impossible)
+
+            # Run callback
+            output_fn(feature)
+
+            unique_id += 1
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc9ad6a4d7c0b0f4bd47536d98786cb6fb2551ec
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
@@ -0,0 +1,199 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+# To support feature cache.
+import pickle
+from transformers import BertTokenizer, AutoTokenizer
+from general_perf.datasets.open_squad.create_squad_data import read_squad_examples, convert_examples_to_features
+import collections
+from general_perf.datasets import data_loader
+import logging
+from tqdm import tqdm
+import numpy as np
+
+INPUT_TYPE = {
+    "UINT8": np.uint8,
+    "FLOAT32": np.float32,
+    "LONG": np.long,
+    "INT32": np.int32,
+    "INT64": np.int64
+}
+
+max_seq_length = 384
+max_query_length = 64
+doc_stride = 128
+
+log = logging.getLogger("SQUAD")
+
+
+class DataLoader(data_loader.Dataset):
+    def __init__(self, config):
+        super(DataLoader, self).__init__(config)
+
+        log.info("Initial...")
+        self.config = config
+        model = self.config["model"]
+        total_count_override = None
+        perf_count_override = None
+        eval_features = []
+        # Load features if cached, convert from examples otherwise.
+        input_file = "general_perf/datasets/open_squad/dev-v1.1.json"
+        cache_path = 'general_perf/datasets/open_squad/eval_features_' + self.config[
+            'model'] + '.pickle'
+        if os.path.exists(cache_path):
+            with open(cache_path, 'rb') as cache_file:
+                eval_features = pickle.load(cache_file)
+            eval_examples = read_squad_examples(input_file=input_file,
+                                                is_training=False,
+                                                version_2_with_negative=False)
+        else:
+            log.info("Start to generate data")
+            if "roberta" in self.config['model']:
+                tokenizer = AutoTokenizer.from_pretrained(
+                    "csarron/roberta-base-squad-v1")
+            elif "albert" in self.config['model']:
+                tokenizer = AutoTokenizer.from_pretrained(
+                    "madlag/albert-base-v2-squad")
+            elif "deberta" in self.config['model']:
+                tokenizer = AutoTokenizer.from_pretrained(
+                    "Palak/microsoft_deberta-base_squad")
+            else:
+                tokenizer = BertTokenizer(
+                    "general_perf/datasets/open_squad/vocab.txt")
+            eval_examples = read_squad_examples(input_file=input_file,
+                                                is_training=False,
+                                                version_2_with_negative=False)
+
+            def append_feature(feature):
+                eval_features.append(feature)
+
+            convert_examples_to_features(examples=eval_examples,
+                                         tokenizer=tokenizer,
+                                         max_seq_length=max_seq_length,
+                                         doc_stride=doc_stride,
+                                         max_query_length=max_query_length,
+                                         is_training=False,
+                                         output_fn=append_feature,
+                                         verbose_logging=False)
+
+            with open(cache_path, 'wb') as cache_file:
+                pickle.dump(eval_features, cache_file)
+
+        self.eval_features = eval_features
+        self.eval_examples = eval_examples
+        self.count = total_count_override or len(self.eval_features)
+        self.items = len(self.eval_features)
+        self.perf_count = perf_count_override or self.count
+        self.model = model
+        self.cur_bs = 1
+        self.batch_num = int(self.items / self.cur_bs)
+
+        # save mask name to help setting the the results at unmasked positions to zero
+        if "roberta" in self.model or "torch" in self.model:
+            self.mask_name = "attention_mask.1"
+        else:
+            self.mask_name = "input_mask:0"
+
+    def name(self):
+        return self.config['dataset_name']
+
+    def preprocess(self):
+        log.info("Preprocessing...")
+
+        self.rebatch(self.batch_num, skip=False)
+
+    def rebatch(self, new_bs, skip=True):
+        log.info("Rebatching batch size to: {} ...".format(new_bs))
+
+        if self.cur_bs == new_bs and skip:
+            return
+
+        self.cur_bs = new_bs
+        self.batch_num = int(self.items / self.cur_bs)
+        self.batched_data = []
+        for i in tqdm(range(self.batch_num)):
+            features = collections.defaultdict(list)
+            for j in range(i * self.cur_bs, (i + 1) * self.cur_bs):
+                if "torch" in self.model:
+                    features['input_ids.1'].append(
+                        self.eval_features[j].input_ids)
+                    features['attention_mask.1'].append(
+                        self.eval_features[j].input_mask)
+                    if "roberta" in self.model:
+                        features['token_type_ids.1'].append(
+                            np.zeros((384,)))
+                    elif "deberta" in self.model:
+                        features['token_type_ids'].append(
+                            self.eval_features[j].segment_ids)
+                    else:
+                        features['token_type_ids.1'].append(
+                            self.eval_features[j].segment_ids)
+                else:
+                    features['input_ids:0'].append(
+                        self.eval_features[j].input_ids)
+                    features['input_mask:0'].append(
+                        self.eval_features[j].input_mask)
+                    features['segment_ids:0'].append(
+                        self.eval_features[j].segment_ids)
+            self.batched_data.append(features)
+
+    def get_samples(self, sample_id):
+        if sample_id >= len(self.batched_data) or sample_id < 0:
+            raise ValueError("Your Input ID is out of range")
+        return self.batched_data[sample_id], []
+
+    def get_id(self, sample_id):
+        if sample_id >= len(self.batched_data) or sample_id < 0:
+            raise ValueError("Your Input ID is out of range")
+        return [
+            self.eval_features[i].unique_id
+            for i in range(sample_id * self.cur_bs, (sample_id + 1) *
+                           self.cur_bs)
+        ]
+
+    def get_fake_samples(self, batch_size, shape, input_type):
+        data = {}
+
+        avg_seq_len = 192
+        max_seq_len = 384
+
+        if input_type:
+            i = 0
+            for key, val in shape.items():
+                val = [val[0] * batch_size] + val[1:]
+                if i == 0:
+                    # fake input id and mask
+                    input_ids = np.zeros(val).astype(INPUT_TYPE[input_type[i]])
+                    data[key] = input_ids
+                elif i == 1:
+                    # fake input array length
+                    input_len = np.random.randint(low=2 * avg_seq_len -
+                                                  max_seq_len,
+                                                  high=max_seq_len + 1,
+                                                  size=(batch_size),
+                                                  dtype=np.int32)
+
+                    input_mask = np.zeros(val).astype(
+                        INPUT_TYPE[input_type[i]])
+
+                    for b_idx, s_len in enumerate(input_len):
+                        input_mask[b_idx][:s_len] = 1
+                    data[key] = input_mask
+                else:
+                    data[key] = np.zeros(val).astype(INPUT_TYPE[input_type[i]])
+                i += 1
+            return data
+        else:
+            raise ValueError("Please provide input type")
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/test_accuracy.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/test_accuracy.py
new file mode 100644
index 0000000000000000000000000000000000000000..5edd352cbd9970c7502255654d8824f74f2ee1a6
--- /dev/null
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/test_accuracy.py
@@ -0,0 +1,134 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import collections
+import numpy as np
+import tensorflow as tf
+import torch
+from tqdm import tqdm
+
+from general_perf.datasets.open_squad.bert.accuracy_squad import write_predictions
+from general_perf.datasets.open_squad.bert.evaluate import check_accuracy
+from general_perf.datasets import test_accuracy
+
+
+RawResult = collections.namedtuple("RawResult",
+                                   ["unique_id", "start_logits", "end_logits"])
+
+log = logging.getLogger("TestAccuracy")
+
+
+class AccuracyChecker(test_accuracy.AccuracyChecker):
+    def calculate_acc(self, data_percent):
+        log.info("Start to calculate accuracy...")
+        results, diffs = [], []
+        num = int((data_percent / 100) * self.dataloader.get_batch_count()
+                  ) if data_percent else self.dataloader.get_batch_count()
+
+        for i in tqdm(range(num)):
+            test_data, _ = self.dataloader.get_samples(i)
+            unique_ids = self.dataloader.get_id(i)
+            result = self.runtime_backend.predict(test_data)
+            start_logits, end_logits = self._post_processing(
+                result, self.configs['framework'])
+
+            # set results at unmasked positions to zero since the vendor's result may have different value at those meaningless positions
+            def set_unmask_to_zero(res, mask):
+                arr = np.array(res)
+                arr[mask == 0] = 0.0
+                return list(arr)
+
+            for i, mask in enumerate(np.array(test_data[self.dataloader.mask_name])):
+                for i, sl in enumerate(start_logits):
+                    start_logits[i] = set_unmask_to_zero(sl, mask)
+
+                for i, el in enumerate(end_logits):
+                    end_logits[i] = set_unmask_to_zero(el, mask)
+
+            for i, u_id in enumerate(unique_ids):
+                results.append(
+                    RawResult(unique_id=u_id,
+                              start_logits=start_logits[i],
+                              end_logits=end_logits[i]))
+
+            diffs.append(start_logits + end_logits)
+
+        np.save(self.output_dir + "/{}.npy".format(self.dataloader.name()),
+                diffs)
+        data_file = os.path.abspath('.') + "/general_perf/datasets/open_squad/dev-v1.1.json"
+        predict_file = self.output_dir[:self.output_dir.
+                                       rindex('/')] + "/predictions.json"
+        write_predictions(self.dataloader.eval_examples,
+                          self.dataloader.eval_features, results, 20, 30, True,
+                          predict_file)
+        result = check_accuracy(data_file, predict_file,
+                                num * self.dataloader.cur_bs)
+        log.info('Batch size is {}, F1: {}, Exact Match:{}'.format(
+            self.dataloader.cur_bs, result['F1 Score'], result['Exact Match']))
+        return result
+
+    def _post_processing(self, inputs, framework):
+        start_results, end_results = [], []
+
+        if framework == "Tensorflow":
+            if 'distill' in self.configs['model']:
+                (start_logits, end_logits) = (inputs["output_0"],
+                                              inputs["output_1"])
+                for i in range(self.dataloader.cur_bs):
+                    start_logit = [float(x) for x in start_logits[i].flat]
+                    end_logit = [float(x) for x in end_logits[i].flat]
+                    start_results.append(start_logit)
+                    end_results.append(end_logit)
+            else:
+                tensor_name = list(inputs)[0]
+                for i in range(len(inputs[tensor_name])):
+                    logits = tf.transpose(np.array([inputs[tensor_name][i]]),
+                                          [2, 0, 1])
+                    unstacked_logits = tf.unstack(logits, axis=0)
+                    if tf.executing_eagerly():
+                        (start_logit,
+                         end_logit) = (unstacked_logits[0].numpy(),
+                                       unstacked_logits[1].numpy())
+                    else:
+                        with tf.compat.v1.Session():
+                            (start_logit,
+                             end_logit) = (unstacked_logits[0].eval(),
+                                           unstacked_logits[1].eval())
+                    start_logit = [float(x) for x in start_logit.flat]
+                    end_logit = [float(x) for x in end_logit.flat]
+                    start_results.append(start_logit)
+                    end_results.append(end_logit)
+        else:
+            if isinstance(inputs, dict):
+                (start_logits, end_logits) = (
+                    inputs["start_logits"],
+                    inputs["end_logits"],
+                )
+            elif isinstance(inputs[0], torch.Tensor):
+                (start_logits, end_logits) = (
+                    inputs[0].float().cpu().detach().numpy() if inputs[0].dtype==torch.bfloat16 else inputs[0].cpu().detach().numpy(),
+                    inputs[1].float().cpu().detach().numpy() if inputs[1].dtype==torch.bfloat16 else inputs[1].cpu().detach().numpy(),
+                )
+            else:
+                (start_logits, end_logits) = (inputs[0], inputs[1])
+            
+            for i in range(self.dataloader.cur_bs):
+                start_logit = [float(x) for x in start_logits[i].flat]
+                end_logit = [float(x) for x in end_logits[i].flat]
+                start_results.append(start_logit)
+                end_results.append(end_logit)
+
+        return start_results, end_results
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json
index 278e9e920a5ce242b7c925510c268c7ba5a68a3a..687fc7a02bddba0d158af8056cd1a525a071ff70 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json
@@ -5,10 +5,10 @@
     "framework_version": "2.4.0",
     "model_format": "saved_model",
     "model_precision": "FP32",
-    "inputs": "input_segment:0,input_token:0",
+    "inputs": "input_segment0,input_token0",
     "outputs": "Identity:0",
-    "input_shape": {"input_segment:0": [1, 1024], "input_token:0": [1, 1024]},
+    "input_shape": {"input_segment0": [1, 1024], "input_token0": [1, 1024]},
     "input_type": "FLOAT32,FLOAT32",
     "dataset_name": "open_cail2019",
-    "max_batch_size": 64
-}
\ No newline at end of file
+    "max_batch_size": 128
+}
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/requirements.txt b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
index e57e2c9c8f12a0f98011960785b28899a54741cf..8293d0bab868a15555bacc0c052e1787d4f6cb51 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
@@ -8,5 +8,5 @@ opencv-python
 transformers
 tokenization
 fpdf
-typing-extensions==3.7.4.3
+typing-extensions==4.12.2
 numpy==1.23.0