diff --git a/models/audio/speech_recognition/conformer/igie/README.md b/models/audio/speech_recognition/conformer/igie/README.md
index 9141b3c8bab3498c3ef8d9638de8f945a9089b70..ae96f9d4b9433e57973f2f7d6d1b5f1e206ef9aa 100644
--- a/models/audio/speech_recognition/conformer/igie/README.md
+++ b/models/audio/speech_recognition/conformer/igie/README.md
@@ -24,6 +24,12 @@ Dataset: <https://www.openslr.org/33/> to download the Aishell dataset.
 ### Install Dependencies
 
 ```bash
+# Install libGL
+## CentOS
+yum install sox sox-devel -y
+## Ubuntu
+apt install sox libsox-fmt-all -y
+
 pip3 install -r requirements.txt
 cd ctc_decoder/swig && bash setup.sh
 cd ../../
diff --git a/models/audio/speech_recognition/conformer/igie/ci/prepare.sh b/models/audio/speech_recognition/conformer/igie/ci/prepare.sh
index 49f448a67b02f4ee0dd8b313948e9b8d710c2fce..8290acf0fb594993f0439c539f7abfff54a34a15 100644
--- a/models/audio/speech_recognition/conformer/igie/ci/prepare.sh
+++ b/models/audio/speech_recognition/conformer/igie/ci/prepare.sh
@@ -16,6 +16,15 @@
 
 set -x
 
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install sox libsox-fmt-all -y
+elif [[ ${ID} == "centos" ]]; then
+    yum install sox sox-devel -y
+else
+    echo "Not Support Os"
+fi
+
 pip3 install -r requirements.txt
 cd ctc_decoder/swig && bash setup.sh
 cd ../../
@@ -39,4 +48,4 @@ onnxsim encoder_bs24_seq384_static.onnx encoder_bs24_seq384_static_opt.onnx
 python3 alter_onnx.py --batch_size 24 --path encoder_bs24_seq384_static_opt.onnx
 
 # Need to unzip aishell to the current directory. For details, refer to data.list
-tar -zxvf aishell.tar.gz
+# tar -zxvf aishell.tar.gz
diff --git a/models/audio/speech_recognition/conformer/igie/requirements.txt b/models/audio/speech_recognition/conformer/igie/requirements.txt
index 2f7cd1f24262857100607eb19f6ccc14b7e98a31..8820eb754dec653c319dc0c86d53049346c7f7b6 100644
--- a/models/audio/speech_recognition/conformer/igie/requirements.txt
+++ b/models/audio/speech_recognition/conformer/igie/requirements.txt
@@ -1,4 +1,4 @@
 tqdm
 onnx
 typeguard==2.13.3
-onnxsim
+onnxsim
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..377e110b36cc140a55edc9dcc1b20dc5f91387a2 100644
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py
+++ b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py
@@ -29,7 +29,7 @@ from wenet.utils.cmvn import load_cmvn
 
 def init_model(configs):
     if configs['cmvn_file'] is not None:
-        mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
+        mean, istd = load_cmvn(configs['cmvn_file'], configs['cmvn_conf']['is_json_cmvn'])
         global_cmvn = GlobalCMVN(
             torch.from_numpy(mean).float(),
             torch.from_numpy(istd).float())
diff --git a/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh b/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
index 5a1f966836c58193331ab4d43411a5622c04ad79..3b9bb751e92924655b447fdc785899465a846b61 100644
--- a/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
+++ b/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
@@ -32,6 +32,8 @@ cp -r /root/data/checkpoints/8886 results/transformer/
 mkdir -p results/transformer/8886/save
 mkdir -p /home/data/speechbrain/aishell/csv_data
 ln -s /root/data/datasets/AISHELL/data_aishell /home/data/speechbrain/aishell/
+cp /root/data/datasets/rirs_noises.zip /home/data/speechbrain/aishell/
+unzip -o /home/data/speechbrain/aishell/rirs_noises.zip -d /home/data/speechbrain/aishell/
 cp results/transformer/8886/*.csv /home/data/speechbrain/aishell/csv_data
 
 bash build.sh
diff --git a/models/cv/classification/densenet121/ixrt/README.md b/models/cv/classification/densenet121/ixrt/README.md
index 58cce7a327e3d3b00c4b2ccf289e23124050d02a..a5dbc7c7f19a4121e1d769ec50a9b7e2c308489b 100644
--- a/models/cv/classification/densenet121/ixrt/README.md
+++ b/models/cv/classification/densenet121/ixrt/README.md
@@ -33,8 +33,9 @@ pip3 install -r ../../ixrt_common/requirements.txt
 ### Model Conversion
 
 ```bash
+# download model into /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
 mkdir checkpoints
-python3 ../../ixrt_common/export.py --model-name densenet121 --weight densenet121-a639ec97.pth --output checkpoints/densenet121.onnx
+python3 export.py --output checkpoints/densenet121.onnx
 ```
 
 ## Model Inference
diff --git a/models/cv/classification/densenet121/ixrt/ci/prepare.sh b/models/cv/classification/densenet121/ixrt/ci/prepare.sh
index 3ac521c0ed745dd4b98f475b9af614bff3137105..8d542a84ee0e27037281fc85a3a9799d159476e8 100644
--- a/models/cv/classification/densenet121/ixrt/ci/prepare.sh
+++ b/models/cv/classification/densenet121/ixrt/ci/prepare.sh
@@ -27,4 +27,4 @@ fi
 
 pip install -r ../../ixrt_common/requirements.txt
 mkdir checkpoints
-python3 ../../ixrt_common/export.py --model-name densenet121 --weight densenet121-a639ec97.pth --output checkpoints/densenet121.onnx
\ No newline at end of file
+python3 export.py --output checkpoints/densenet121.onnx
\ No newline at end of file
diff --git a/models/cv/classification/densenet121/ixrt/export.py b/models/cv/classification/densenet121/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff86753a004cd6611c7c7104e0061904bc3d2184
--- /dev/null
+++ b/models/cv/classification/densenet121/ixrt/export.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import torch
+import torchvision.models as models
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output_model", type=str)
+    args = parser.parse_args()
+    return args
+
+args = parse_args()
+model = models.densenet121(pretrained=True)
+model.cuda()
+model.eval()
+input = torch.randn(1, 3, 224, 224, device='cuda')
+export_onnx_file = args.output_model
+
+torch.onnx.export(model,        
+                  input,            
+                  export_onnx_file,       
+                  export_params=True,  
+                  opset_version=11,    
+                  do_constant_folding=True,  
+                  input_names = ['input'],   
+                  output_names = ['output'],) 
+print(" ") 
+print('Model has been converted to ONNX') 
+print("exit")
+exit()
diff --git a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
index c69f7471f6a82156b66ba0cd953c0e25e0d9ec17..7bb940752879bc68163f7a4dc4c31e3494dbec54 100644
--- a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
+++ b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
@@ -28,4 +28,4 @@ fi
 pip install -r ../../ixrt_common/requirements.txt
 
 mkdir checkpoints
-python3 ../../ixrt_common/export_onnx.py --model-name efficientnet_b1 --output_model checkpoints/efficientnet-b1.onnx
\ No newline at end of file
+python3 ../../ixrt_common/export.py --model-name efficientnet_b1 --weight efficientnet_b1-c27df63c.pth --output checkpoints/efficientnet_b1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/inference.py b/models/cv/classification/efficientnetv2_rw_t/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..e33c91fa1de2d25402f0ad3318e15f372d829908
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/igie/inference.py
@@ -0,0 +1,183 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+from timm.data import create_dataset, create_loader
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--engine", 
+                        type=str, 
+                        required=True, 
+                        help="igie engine path.")
+    
+    parser.add_argument("--batchsize",
+                        type=int,
+                        required=True, 
+                        help="inference batch size.")
+    
+    parser.add_argument("--datasets", 
+                        type=str, 
+                        required=True, 
+                        help="datasets path.")
+
+    parser.add_argument("--input_name", 
+                        type=str, 
+                        required=True, 
+                        help="input name of the model.")
+    
+    parser.add_argument("--warmup", 
+                        type=int, 
+                        default=3, 
+                        help="number of warmup before test.")           
+    
+    parser.add_argument("--num_workers",
+                        type=int,
+                        default=16,
+                        help="number of workers used in pytorch dataloader.")
+    
+    parser.add_argument("--acc_target",
+                        type=float,
+                        default=None,
+                        help="Model inference Accuracy target.")
+    
+    parser.add_argument("--fps_target",
+                        type=float,
+                        default=None,
+                        help="Model inference FPS target.")
+
+    parser.add_argument("--perf_only",
+                        type=bool,
+                        default=False,
+                        help="Run performance test only")
+    
+    args = parser.parse_args()
+
+    return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+    datasets = create_dataset(root=data_path, name="")
+
+    dataloader = create_loader(
+        datasets,
+        input_size=(3, 288, 288),
+        batch_size=batch_size,
+        interpolation='bicubic',
+        mean=(0.485, 0.456, 0.406),
+        std=(0.229, 0.224, 0.225),
+        crop_pct=1.0,
+        use_prefetcher = False,
+        num_workers = num_workers
+    )
+    return dataloader
+
+def get_topk_accuracy(pred, label):
+    if isinstance(pred, np.ndarray):
+        pred = torch.from_numpy(pred)
+        
+    if isinstance(label, np.ndarray):
+        label = torch.from_numpy(label)
+    
+    top1_acc = 0
+    top5_acc = 0
+    for idx in range(len(label)):
+        label_value = label[idx]
+        if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+            top1_acc += 1
+            top5_acc += 1
+
+        elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+            top5_acc += 1
+            
+    return top1_acc, top5_acc
+
+def main():
+    args = parse_args()
+
+    batch_size = args.batchsize
+
+    # create iluvatar target & device
+    target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")    
+    device = tvm.device(target.kind.name, 0)
+
+    # load engine
+    lib = tvm.runtime.load_module(args.engine)
+
+    # create runtime from engine
+    module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+    # just run perf test
+    if args.perf_only:
+        ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)        
+        prof_res = np.array(ftimer().results) * 1000 
+        fps = batch_size * 1000 / np.mean(prof_res)
+        print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+    else:
+        # warm up
+        for _ in range(args.warmup):
+            module.run()
+
+        # get dataloader
+        dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+        
+        top1_acc = 0
+        top5_acc = 0
+        total_num = 0
+
+        for image, label in tqdm(dataloader):
+
+            # pad the last batch
+            pad_batch = len(image) != batch_size
+            
+            if pad_batch:
+                origin_size = len(image)
+                image = np.resize(image, (batch_size, *image.shape[1:]))
+
+            module.set_input(args.input_name, tvm.nd.array(image, device))
+
+            # run inference
+            module.run()
+            
+            pred = module.get_output(0).asnumpy()
+
+            if pad_batch:
+                pred = pred[:origin_size]
+
+            # get batch accuracy
+            batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+            top1_acc += batch_top1_acc
+            top5_acc += batch_top5_acc
+            total_num += batch_size
+
+        result_stat = {}
+        result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+        result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+        print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/requirements.txt b/models/cv/classification/efficientnetv2_rw_t/igie/requirements.txt
deleted file mode 100644
index 36677a29ab3a81e04e55e2185513580169404d15..0000000000000000000000000000000000000000
--- a/models/cv/classification/efficientnetv2_rw_t/igie/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-timm
-onnx
-tqdm
diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh
index 9d96fcfb93ffd560a2a682f94068fd3322833a93..7e2e5ffbc665d6e70d0dc5ff7bcf0b870d79dd1b 100644
--- a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh
+++ b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh
@@ -28,7 +28,7 @@ python3 ${RUN_DIR}build_engine.py                     \
 
 
 # inference
-python3 ${RUN_DIR}inference.py                                      \
+python3 inference.py                                      \
     --engine efficientnetv2_rw_t_bs_${batchsize}_fp16.so  \
     --batchsize ${batchsize}                              \
     --input_name input                                    \
diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh
index a3f9d58520cb98365e5c789fce1f07dba5627249..c08b48407740ee447d6bad514fe0aa76c001aec6 100644
--- a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh
+++ b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh
@@ -28,7 +28,7 @@ python3 ${RUN_DIR}build_engine.py                     \
 
 
 # inference
-python3 ${RUN_DIR}inference.py                                      \
+python3 inference.py                                      \
     --engine efficientnetv2_rw_t_bs_${batchsize}_fp16.so  \
     --batchsize ${batchsize}                              \
     --input_name input                                    \
diff --git a/models/cv/classification/ixrt_common/build_i8_engine.py b/models/cv/classification/ixrt_common/build_i8_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..6038b33f50cff7a14efcefa6673ae9d2fd19870b
--- /dev/null
+++ b/models/cv/classification/ixrt_common/build_i8_engine.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import argparse
+import json
+import os
+
+import tensorrt
+import tensorrt as trt
+
+TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE)
+
+EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+
+
+def GiB(val):
+    return val * 1 << 30
+
+
+def json_load(filename):
+    with open(filename) as json_file:
+        data = json.load(json_file)
+    return data
+
+
+def setDynamicRange(network, json_file):
+    """Sets ranges for network layers."""
+    quant_param_json = json_load(json_file)
+    act_quant = quant_param_json["act_quant_info"]
+
+    for i in range(network.num_inputs):
+        input_tensor = network.get_input(i)
+        if act_quant.__contains__(input_tensor.name):
+            print(input_tensor.name)
+            value = act_quant[input_tensor.name]
+            tensor_max = abs(value)
+            tensor_min = -abs(value)
+            input_tensor.dynamic_range = (tensor_min, tensor_max)
+
+    for i in range(network.num_layers):
+        layer = network.get_layer(i)
+
+        for output_index in range(layer.num_outputs):
+            tensor = layer.get_output(output_index)
+
+            if act_quant.__contains__(tensor.name):
+                value = act_quant[tensor.name]
+                tensor_max = abs(value)
+                tensor_min = -abs(value)
+                tensor.dynamic_range = (tensor_min, tensor_max)
+            else:
+                print("\033[1;32m%s\033[0m" % tensor.name)
+
+
+def build_engine(onnx_file, json_file, engine_file):
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(EXPLICIT_BATCH)
+
+    config = builder.create_builder_config()
+
+    # If it is a dynamic onnx model , you need to add the following.
+    # profile = builder.create_optimization_profile()
+    # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w))
+    # config.add_optimization_profile(profile)
+
+    parser = trt.OnnxParser(network, TRT_LOGGER)
+    # config.max_workspace_size = GiB(1)
+    if not os.path.exists(onnx_file):
+        quit("ONNX file {} not found".format(onnx_file))
+
+    with open(onnx_file, "rb") as model:
+        if not parser.parse(model.read()):
+            print("ERROR: Failed to parse the ONNX file.")
+            for error in range(parser.num_errors):
+                print(parser.get_error(error))
+            return None
+
+    config.set_flag(trt.BuilderFlag.INT8)
+
+    setDynamicRange(network, json_file)
+
+    engine = builder.build_engine(network, config)
+
+    with open(engine_file, "wb") as f:
+        f.write(engine.serialize())
+
+
+if __name__ == "__main__":
+    # Add plugins if needed
+    # import ctypes
+    # ctypes.CDLL("libmmdeploy_tensorrt_ops.so")
+    parser = argparse.ArgumentParser(
+        description="Writing qparams to onnx to convert tensorrt engine."
+    )
+    parser.add_argument("--onnx", type=str, default=None)
+    parser.add_argument("--qparam_json", type=str, default=None)
+    parser.add_argument("--engine", type=str, default=None)
+    arg = parser.parse_args()
+
+    build_engine(arg.onnx, arg.qparam_json, arg.engine)
+    print("\033[1;32mgenerate %s\033[0m" % arg.engine)
\ No newline at end of file
diff --git a/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG
index 0275a569d9c68074365cbe75427920818aec93ca..42e3e648cb2839c2017ec5134a9625f46566be73 100644
--- a/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG
+++ b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG
@@ -17,8 +17,8 @@
 # MODEL_NAME : 生成onnx/engine的basename
 # ORIGINE_MODEL : 原始onnx文件名称
 IMGSIZE=224
-MODEL_NAME=EfficientNet_b1
-ORIGINE_MODEL=efficientnet-b1.onnx
+MODEL_NAME=efficientnet_b1
+ORIGINE_MODEL=efficientnet_b1.onnx
 
 # QUANT CONFIG (仅PRECISION为int8时生效)
     # QUANT_OBSERVER : 量化策略，可选 [hist_percentile, percentile, minmax, entropy, ema]
diff --git a/models/cv/classification/ixrt_common/quant_i8.py b/models/cv/classification/ixrt_common/quant_i8.py
new file mode 100644
index 0000000000000000000000000000000000000000..c728c7a128f7ba5a041160c9452980861c7a9071
--- /dev/null
+++ b/models/cv/classification/ixrt_common/quant_i8.py
@@ -0,0 +1,166 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+"""这是一个高度自动化的 PPQ 量化的入口脚本，将你的模型和数据按要求进行打包:
+
+在自动化 API 中，我们使用 QuantizationSetting 对象传递量化参数。
+
+This file will show you how to quantize your network with PPQ
+    You should prepare your model and calibration dataset as follow:
+
+    ~/working/model.onnx                          <--  your model
+    ~/working/data/*.npy or ~/working/data/*.bin  <--  your dataset
+
+if you are using caffe model:
+    ~/working/model.caffemdoel  <--  your model
+    ~/working/model.prototext   <--  your model
+
+### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ###
+
+quantized model will be generated at: ~/working/quantized.onnx
+"""
+from ppq import *
+from ppq.api import *
+import os
+from calibration_dataset import getdataloader
+import argparse
+import random
+import numpy as np
+import torch
+
+
+def setseed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_name", type=str)
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--dataset_dir", type=str, default="imagenet_val")
+    parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"],
+                        default="hist_percentile")
+    parser.add_argument("--disable_quant_names", nargs='*', type=str)
+    parser.add_argument("--save_dir", type=str, help="save path", default=None)
+    parser.add_argument("--bsz", type=int, default=32)
+    parser.add_argument("--step", type=int, default=20)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--imgsz", type=int, default=224)
+    args = parser.parse_args()
+    print("Quant config:", args)
+    print(args.disable_quant_names)
+    return args
+
+
+config = parse_args()
+
+# modify configuration below:
+WORKING_DIRECTORY = 'checkpoints'  # choose your working directory
+TARGET_PLATFORM = TargetPlatform.TRT_INT8  # choose your target platform
+MODEL_TYPE = NetworkFramework.ONNX  # or NetworkFramework.CAFFE
+INPUT_LAYOUT = 'chw'  # input data layout, chw or hwc
+NETWORK_INPUTSHAPE = [32, 3, 224, 224]  # input shape of your network
+EXECUTING_DEVICE = 'cuda'  # 'cuda' or 'cpu'.
+REQUIRE_ANALYSE = False
+TRAINING_YOUR_NETWORK = False  # 是否需要 Finetuning 一下你的网络
+# -------------------------------------------------------------------
+# 加载你的模型文件，PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式
+# 如果你正使用 pytorch, tensorflow 等框架，你可以先将模型导出成 onnx
+# 使用 torch.onnx.export 即可，如果你在导出 torch 模型时发生错误，欢迎与我们联系。
+# -------------------------------------------------------------------
+graph = None
+if MODEL_TYPE == NetworkFramework.ONNX:
+    graph = load_onnx_graph(onnx_import_file=config.model)
+if MODEL_TYPE == NetworkFramework.CAFFE:
+    graph = load_caffe_graph(
+        caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'),
+        prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt'))
+assert graph is not None, 'Graph Loading Error, Check your input again.'
+
+# -------------------------------------------------------------------
+# SETTING 对象用于控制 PPQ 的量化逻辑，主要描述了图融合逻辑、调度方案、量化细节策略等
+# 当你的网络量化误差过高时，你需要修改 SETTING 对象中的属性来进行特定的优化
+# -------------------------------------------------------------------
+QS = QuantizationSettingFactory.default_setting()
+
+# -------------------------------------------------------------------
+# 下面向你展示了如何使用 finetuning 过程提升量化精度
+# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度
+# 开启他们的方式都是 QS.xxxx = True
+# 按需使用，不要全部打开，容易起飞
+# -------------------------------------------------------------------
+if TRAINING_YOUR_NETWORK:
+    QS.lsq_optimization = True  # 启动网络再训练过程，降低量化误差
+    QS.lsq_optimization_setting.steps = 500  # 再训练步数，影响训练时间，500 步大概几分钟
+    QS.lsq_optimization_setting.collecting_device = 'cuda'  # 缓存数据放在那，cuda 就是放在gpu，如果显存超了你就换成 'cpu'
+
+
+dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz)
+# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x，但是你如果没有装相应编译环境的话是编译不了的
+# 你可以尝试安装编译环境，或者在不启动 CUDA KERNEL 的情况下完成量化：移除 with ENABLE_CUDA_KERNEL(): 即可
+with ENABLE_CUDA_KERNEL():
+    print('网络正量化中，根据你的量化配置，这将需要一段时间:')
+    quantized = quantize_native_model(
+        setting=QS,  # setting 对象用来控制标准量化逻辑
+        model=graph,
+        calib_dataloader=dataloader,
+        calib_steps=config.step,
+        input_shape=NETWORK_INPUTSHAPE,  # 如果你的网络只有一个输入，使用这个参数传参
+        inputs=None,
+        # 如果你的网络有多个输入，使用这个参数传参，就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)]
+        collate_fn=lambda x: x[0].to(EXECUTING_DEVICE),  # collate_fn 跟 torch dataloader 的 collate fn 是一样的，用于数据预处理，
+        # 你当然也可以用 torch dataloader 的那个，然后设置这个为 None
+        platform=TARGET_PLATFORM,
+        device=EXECUTING_DEVICE,
+        do_quantize=True)
+
+    # -------------------------------------------------------------------
+    # 如果你需要执行量化后的神经网络并得到结果，则需要创建一个 executor
+    # 这个 executor 的行为和 torch.Module 是类似的，你可以利用这个东西来获取执行结果
+    # 请注意，必须在 export 之前执行此操作。
+    # -------------------------------------------------------------------
+    executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE)
+    # output = executor.forward(input)
+
+    # -------------------------------------------------------------------
+    # PPQ 计算量化误差时，使用信噪比的倒数作为指标，即噪声能量 / 信号能量
+    # 量化误差 0.1 表示在整体信号中，量化噪声的能量约为 10%
+    # 你应当注意，在 graphwise_error_analyse 分析中，我们衡量的是累计误差
+    # 网络的最后一层往往都具有较大的累计误差，这些误差是其前面的所有层所共同造成的
+    # 你需要使用 layerwise_error_analyse 逐层分析误差的来源
+    # -------------------------------------------------------------------
+    print('正计算网络量化误差(SNR)，最后一层的误差应小于 0.1 以保证量化精度:')
+    reports = graphwise_error_analyse(
+        graph=quantized, running_device=EXECUTING_DEVICE, steps=32,
+        dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE))
+    for op, snr in reports.items():
+        if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著，请考虑进行优化')
+
+    if REQUIRE_ANALYSE:
+        print('正计算逐层量化误差(SNR)，每一层的独立量化误差应小于 0.1 以保证量化精度:')
+        layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE,
+                                interested_outputs=None,
+                                dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))
+
+    # -------------------------------------------------------------------
+    # 使用 export_ppq_graph 函数来导出量化后的模型
+    # PPQ 会根据你所选择的导出平台来修改模型格式
+    # -------------------------------------------------------------------
+    print('网络量化结束，正在生成目标文件:')
+    export_ppq_graph(
+        graph=quantized, platform=TARGET_PLATFORM,
+        graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"),
+        config_save_to=os.path.join(config.save_dir, 'quant_cfg.json'))
diff --git a/models/cv/classification/resnetv1d50/ixrt/README.md b/models/cv/classification/resnetv1d50/ixrt/README.md
index 0a5cf2cf44e5bc65ffaae70eaa449ee81dc29e2b..9a8d945de7190080c83437591649145961c7eecb 100644
--- a/models/cv/classification/resnetv1d50/ixrt/README.md
+++ b/models/cv/classification/resnetv1d50/ixrt/README.md
@@ -28,7 +28,7 @@ yum install -y mesa-libGL
 apt install -y libgl1-mesa-glx
 
 pip3 install -r ../../ixrt_common/requirments.txt
-pip3 install mmcv==1.5.3 mmcls==0.24.0
+pip3 install mmcv==1.5.3 mmcls==0.24.0 ppq pycuda transformers==4.37.1
 ```
 
 ### Model Conversion
diff --git a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
index 22566b28034a67b41a63152251dfbd32206087cb..d5adbfd5beca26708b1a07d262b46ff661c5bb9b 100644
--- a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
@@ -26,7 +26,7 @@ else
 fi
 
 pip install -r ../../ixrt_common/requirements.txt
-pip install mmcv==1.5.3 mmcls==0.24.0
+pip install mmcv==1.5.3 mmcls==0.24.0 ppq pycuda transformers==4.37.1
 unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
 mkdir checkpoints
 python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/resnet/resnetv1d50_b32x8_imagenet.py --weight resnetv1d50_b32x8_imagenet_20210531-db14775a.pth --output checkpoints/resnet_v1_d50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
index 482924938efb2a72399df02d9704ea8dd34e82e6..b743d7084ae058118c29daaf494769fc293ceb41 100644
--- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
@@ -51,8 +51,6 @@ echo RUN_DIR : ${RUN_DIR}
 echo CONFIG_DIR : ${CONFIG_DIR}
 echo ====================== Model Info ======================
 echo Model Name : ${MODEL_NAME}
-echo Model Input Name : ${MODEL_INPUT_NAME}
-echo Model Output Name : ${MODEL_OUTPUT_NAME}
 echo Onnx Path : ${ORIGINE_MODEL}
 
 step=0
@@ -71,34 +69,6 @@ else
     echo "  "Generate ${SIM_MODEL}
 fi
 
-# Quant Model
-if [ $PRECISION == "int8" ];then
-    let step++
-    echo;
-    echo [STEP ${step}] : Quant Model
-    if [[ -z ${QUANT_EXIST_ONNX} ]];then
-        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
-    fi
-    if [[ -f ${QUANT_EXIST_ONNX} ]];then
-        SIM_MODEL=${QUANT_EXIST_ONNX}
-        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
-    else
-        python3 ${RUN_DIR}/quant.py            \
-            --model ${SIM_MODEL}               \
-            --model_name ${MODEL_NAME}         \
-            --dataset_dir ${DATASETS_DIR}      \
-            --observer ${QUANT_OBSERVER}       \
-            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
-            --save_dir $CHECKPOINTS_DIR        \
-            --bsz   ${QUANT_BATCHSIZE}         \
-            --step  ${QUANT_STEP}              \
-            --seed  ${QUANT_SEED}              \
-            --imgsz ${IMGSIZE}
-        SIM_MODEL=${QUANT_EXIST_ONNX}
-        echo "  "Generate ${SIM_MODEL}
-    fi
-fi
-
 # Change Batchsize
 let step++
 echo;
@@ -141,4 +111,4 @@ python3 ${RUN_DIR}/inference.py     \
     --acc_target ${TGT}             \
     --bsz ${BSZ}; check_status
 
-exit ${EXIT_STATUS}
\ No newline at end of file
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
index c843057dda987fd834e1fa0580deb2a8cdea17ce..e7a4f1a7276406a0ed7400af4368b5bec2a06e06 100644
--- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
@@ -51,8 +51,6 @@ echo RUN_DIR : ${RUN_DIR}
 echo CONFIG_DIR : ${CONFIG_DIR}
 echo ====================== Model Info ======================
 echo Model Name : ${MODEL_NAME}
-echo Model Input Name : ${MODEL_INPUT_NAME}
-echo Model Output Name : ${MODEL_OUTPUT_NAME}
 echo Onnx Path : ${ORIGINE_MODEL}
 
 step=0
@@ -71,34 +69,6 @@ else
     echo "  "Generate ${SIM_MODEL}
 fi
 
-# Quant Model
-if [ $PRECISION == "int8" ];then
-    let step++
-    echo;
-    echo [STEP ${step}] : Quant Model
-    if [[ -z ${QUANT_EXIST_ONNX} ]];then
-        QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
-    fi
-    if [[ -f ${QUANT_EXIST_ONNX} ]];then
-        SIM_MODEL=${QUANT_EXIST_ONNX}
-        echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
-    else
-        python3 ${RUN_DIR}/quant.py            \
-            --model ${SIM_MODEL}               \
-            --model_name ${MODEL_NAME}         \
-            --dataset_dir ${DATASETS_DIR}      \
-            --observer ${QUANT_OBSERVER}       \
-            --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
-            --save_dir $CHECKPOINTS_DIR        \
-            --bsz   ${QUANT_BATCHSIZE}         \
-            --step  ${QUANT_STEP}              \
-            --seed  ${QUANT_SEED}              \
-            --imgsz ${IMGSIZE}
-        SIM_MODEL=${QUANT_EXIST_ONNX}
-        echo "  "Generate ${SIM_MODEL}
-    fi
-fi
-
 # Change Batchsize
 let step++
 echo;
@@ -141,4 +111,4 @@ python3 ${RUN_DIR}/inference.py     \
     --fps_target ${TGT}             \
     --bsz ${BSZ}; check_status
 
-exit ${EXIT_STATUS}
\ No newline at end of file
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
index a66d6a253206c49ad68752793ffb1bd7b7f12958..df1fdc610c2332f33d210e6f417cf44da7fef7bd 100644
--- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
@@ -13,6 +13,7 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+set -x
 EXIT_STATUS=0
 check_status()
 {
@@ -28,7 +29,7 @@ WARM_UP=0
 LOOP_COUNT=-1
 RUN_MODE=ACC
 PRECISION=int8
-
+export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 # Update arguments
 index=0
 options=$@
@@ -43,6 +44,7 @@ do
 done
 
 source ${CONFIG_DIR}
+echo ${QUANT_OBSERVER}
 ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
 
 echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
@@ -60,16 +62,15 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
 
 # Simplify Model
 let step++
-echo;
-echo [STEP ${step}] : Simplify Model
-if [ -f ${SIM_MODEL} ];then
-    echo "  "Simplify Model, ${SIM_MODEL} has been existed
-else
-    python3 ${RUN_DIR}/simplify_model.py \
-    --origin_model $ORIGINE_MODEL    \
-    --output_model ${SIM_MODEL}
-    echo "  "Generate ${SIM_MODEL}
-fi
+ echo [STEP ${step}] : Simplify Model
+ if [ -f ${SIM_MODEL} ];then
+     echo "  "Simplify Model, ${SIM_MODEL} has been existed
+ else
+     python3 ${RUN_DIR}/simplify_model.py \
+     --origin_model $ORIGINE_MODEL    \
+     --output_model ${SIM_MODEL}
+     echo "  "Generate ${SIM_MODEL}
+ fi
 
 # Quant Model
 if [ $PRECISION == "int8" ];then
@@ -83,7 +84,7 @@ if [ $PRECISION == "int8" ];then
         SIM_MODEL=${QUANT_EXIST_ONNX}
         echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
     else
-        python3 ${RUN_DIR}/quant.py            \
+        python3 ${RUN_DIR}/quant_i8.py            \
             --model ${SIM_MODEL}               \
             --model_name ${MODEL_NAME}         \
             --dataset_dir ${DATASETS_DIR}      \
@@ -120,15 +121,15 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
 if [ -f $ENGINE_FILE ];then
     echo "  "Build Engine Skip, $ENGINE_FILE has been existed
 else
-    python3 ${RUN_DIR}/build_engine.py          \
-        --precision ${PRECISION}                \
-        --model ${FINAL_MODEL}                    \
+    python3 ${RUN_DIR}/build_i8_engine.py          \
+        --onnx ${FINAL_MODEL}                    \
+        --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \
         --engine ${ENGINE_FILE}
     echo "  "Generate Engine ${ENGINE_FILE}
 fi
 
 # Inference
-let step++
+# let step++
 echo;
 echo [STEP ${step}] : Inference
 python3 ${RUN_DIR}/inference.py     \
@@ -141,4 +142,4 @@ python3 ${RUN_DIR}/inference.py     \
     --acc_target ${TGT}             \
     --bsz ${BSZ}; check_status
 
-exit ${EXIT_STATUS}
\ No newline at end of file
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
index e578762eb996592b509a8eed995b15b227ae8a86..72ca157b222ba853eb530146099c0cc3bfbb68c9 100644
--- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
@@ -28,7 +28,7 @@ WARM_UP=3
 LOOP_COUNT=20
 RUN_MODE=FPS
 PRECISION=int8
-
+export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 # Update arguments
 index=0
 options=$@
@@ -43,6 +43,7 @@ do
 done
 
 source ${CONFIG_DIR}
+echo ${QUANT_OBSERVER}
 ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
 
 echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
@@ -60,7 +61,6 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
 
 # Simplify Model
 let step++
-echo;
 echo [STEP ${step}] : Simplify Model
 if [ -f ${SIM_MODEL} ];then
     echo "  "Simplify Model, ${SIM_MODEL} has been existed
@@ -83,7 +83,7 @@ if [ $PRECISION == "int8" ];then
         SIM_MODEL=${QUANT_EXIST_ONNX}
         echo "  "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
     else
-        python3 ${RUN_DIR}/quant.py            \
+        python3 ${RUN_DIR}/quant_i8.py            \
             --model ${SIM_MODEL}               \
             --model_name ${MODEL_NAME}         \
             --dataset_dir ${DATASETS_DIR}      \
@@ -120,15 +120,15 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
 if [ -f $ENGINE_FILE ];then
     echo "  "Build Engine Skip, $ENGINE_FILE has been existed
 else
-    python3 ${RUN_DIR}/build_engine.py          \
-        --precision ${PRECISION}                \
-        --model ${FINAL_MODEL}                    \
+    python3 ${RUN_DIR}/build_i8_engine.py          \
+        --onnx ${FINAL_MODEL}                    \
+        --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \
         --engine ${ENGINE_FILE}
     echo "  "Generate Engine ${ENGINE_FILE}
 fi
 
 # Inference
-let step++
+# let step++
 echo;
 echo [STEP ${step}] : Inference
 python3 ${RUN_DIR}/inference.py     \
@@ -138,7 +138,7 @@ python3 ${RUN_DIR}/inference.py     \
     --warm_up=${WARM_UP}            \
     --loop_count ${LOOP_COUNT}      \
     --test_mode ${RUN_MODE}         \
-    --fps_target ${TGT}             \
+    --acc_target ${TGT}             \
     --bsz ${BSZ}; check_status
 
 exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
index cd826b9795c96c6b3156d80022667d8a60ab6715..b9671165ce53a144c0f9b16d1e54f3ba824723ab 100644
--- a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
+++ b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
@@ -27,4 +27,4 @@ fi
 
 pip install -r ../../ixrt_common/requirements.txt
 mkdir checkpoints
-python3 export.py ../../ixrt_common/export.py --model-name resnext50_32x4d --weight resnext50_32x4d-7cdf4587.pth --output checkpoints/resnext50_32x4d.onnx
\ No newline at end of file
+python3 ../../ixrt_common/export.py --model-name resnext50_32x4d --weight resnext50_32x4d-7cdf4587.pth --output checkpoints/resnext50_32x4d.onnx
\ No newline at end of file
diff --git a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh
index 09fa1878415ca72f1fd17b9ca6e19b16926756f9..66c8f9d0525bc855866325817dd7ee87aad8989f 100644
--- a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh
+++ b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh
@@ -44,6 +44,6 @@ cd ..
 
 mkdir -p checkpoints
 ln -s /root/data/checkpoints/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth ./
-ln -s /root/data/datasets/coco ./
+ln -s /root/data/datasets/coco2017 ./
 python3 solo_torch2onnx.py --cfg ./solo_r50_fpn_3x_coco.py --checkpoint ./solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth --batch_size 1
 mv r50_solo_bs1_800x800.onnx ./checkpoints/r50_solo_bs1_800x800.onnx
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh b/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh
index 700282231245007abf2aad21967073edd312c77e..5ddfdcb2a001023d573acfbeb67e7bb5b5f9e707 100644
--- a/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh
+++ b/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh
@@ -18,7 +18,8 @@ set -x
 
 pip3 install -r requirements.txt
 
-# clone fast-reid first
+# install fast-reid
+git clone https://github.com/JDAI-CV/fast-reid.git --depth=1
 cd fast-reid
 pip3 install -r docs/requirements.txt
 
diff --git a/models/cv/object_detection/foveabox/ixrt/ci/prepare.sh b/models/cv/object_detection/foveabox/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..507d455e4d7a3b7e169cf7c422331910ce8eaa88
--- /dev/null
+++ b/models/cv/object_detection/foveabox/ixrt/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+
+python3 export.py --weight fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth --cfg fovea_r50_fpn_4xb4-1x_coco.py --output foveabox.onnx
+
+onnxsim foveabox.onnx foveabox_opt.onnx
\ No newline at end of file
diff --git a/models/cv/object_detection/fsaf/ixrt/ci/prepare.sh b/models/cv/object_detection/fsaf/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..67155f5998e7d58e3116cd34e7dd8320b37f5437
--- /dev/null
+++ b/models/cv/object_detection/fsaf/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+# export onnx model
+python3 export.py --weight fsaf_r50_fpn_1x_coco-94ccc51f.pth --cfg fsaf_r50_fpn_1x_coco.py --output fsaf.onnx
+
+# use onnxsim optimize onnx model
+onnxsim fsaf.onnx fsaf_opt.onnx
\ No newline at end of file
diff --git a/models/cv/object_detection/hrnet/ixrt/ci/prepare.sh b/models/cv/object_detection/hrnet/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cbc14791567345842f24b3082733dee73dd56776
--- /dev/null
+++ b/models/cv/object_detection/hrnet/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+# export onnx model
+python3 export.py --weight fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth --cfg fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py --output hrnet.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim hrnet.onnx hrnet_opt.onnx
\ No newline at end of file
diff --git a/models/cv/object_detection/retinaface/igie/inference.py b/models/cv/object_detection/retinaface/igie/inference.py
index 44d29d1b9e1c8075c1212cecbe13ed683ebf1d7f..86782697f78fd90308c571d50e70410dcc245344 100644
--- a/models/cv/object_detection/retinaface/igie/inference.py
+++ b/models/cv/object_detection/retinaface/igie/inference.py
@@ -132,7 +132,7 @@ class FaceDataset(Dataset):
         return np.concatenate([i[None] for i in im], axis=0), path, shapes, path_ori
 
     def _load_image(self, i):
-        im = cv2.imread(self.img_dir+'/images'+self.imgs_path[i], cv2.IMREAD_COLOR)
+        im = cv2.imread(self.img_dir+'/images/'+self.imgs_path[i], cv2.IMREAD_COLOR)
         h0, w0 = im.shape[:2] 
         r = self.image_size / max(h0, w0)  
         if r != 1:  
diff --git a/models/cv/object_detection/retinaface/ixrt/README.md b/models/cv/object_detection/retinaface/ixrt/README.md
index 67ce9e3cdccf9bc8326ff98572515ebddce4fa9e..2323b20fe2d009e7c9ad217f858084e196a524ec 100644
--- a/models/cv/object_detection/retinaface/ixrt/README.md
+++ b/models/cv/object_detection/retinaface/ixrt/README.md
@@ -47,7 +47,7 @@ python3 torch2onnx.py --model mobilenet0.25_Final.pth --onnx_model mnetv1_retina
 
 ```bash
 export DATASETS_DIR=/Path/to/widerface/
-export GT_DIR=../igie/ground_truth
+export GT_DIR=../igie/widerface_evaluate/ground_truth
 ```
 
 ### FP16
diff --git a/models/cv/object_detection/retinaface/ixrt/ci/prepare.sh b/models/cv/object_detection/retinaface/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0796d90b857d7d36426bdfd9e38631681b9c0c04
--- /dev/null
+++ b/models/cv/object_detection/retinaface/ixrt/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+
+python3 setup.py build_ext --inplace
+# export onnx model
+python3 torch2onnx.py --model mobilenet0.25_Final.pth --onnx_model mnetv1_retinaface.onnx
\ No newline at end of file
diff --git a/models/cv/object_detection/retinaface/ixrt/evaluation.py b/models/cv/object_detection/retinaface/ixrt/evaluation.py
index d3c6495aaf782725761a52c9d2442564b482155d..8d1a9e0731532faf547aa96417edc1a6da2fda9b 100644
--- a/models/cv/object_detection/retinaface/ixrt/evaluation.py
+++ b/models/cv/object_detection/retinaface/ixrt/evaluation.py
@@ -289,5 +289,7 @@ def evaluation(pred, gt_path, iou_thresh=0.5):
     print("Medium Val AP: {}".format(aps[1]))
     print("Hard   Val AP: {}".format(aps[2]))
     print("=================================================")
+    metricResult = {"metricResult": {"Easy Val AP": aps[0], "Medium Val AP": aps[1], "Hard Val AP": aps[2]}}
+    print(metricResult)
 
     return aps[0]
\ No newline at end of file
diff --git a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh
index 5722980c09728dc0e0fc0bd131bc51c126fa0290..3fca161ad63023affdf118ba3e312ce94455b13b 100644
--- a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh
+++ b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh
@@ -44,6 +44,7 @@ do
 done
 
 PROJ_DIR=${PROJ_DIR:-"."}
+GT_DIR=${GT_DIR:-"../igie/widerface_evaluate/ground_truth"}
 DATASETS_DIR="${DATASETS_DIR}"
 CHECKPOINTS_DIR="${PROJ_DIR}"
 RUN_DIR="${PROJ_DIR}"
diff --git a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh
index ddbcc65577d11410149adeb1c08cdbb305651ce5..cb486381f32bd88954137ccb6a4c44d963c50382 100644
--- a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh
+++ b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh
@@ -44,6 +44,7 @@ do
 done
 
 PROJ_DIR=${PROJ_DIR:-"."}
+GT_DIR=${GT_DIR:-"../igie/widerface_evaluate/ground_truth"}
 DATASETS_DIR="${DATASETS_DIR}"
 CHECKPOINTS_DIR="${PROJ_DIR}"
 RUN_DIR="${PROJ_DIR}"
diff --git a/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py b/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py
index 8095cb6f05897c19d387230755b32c1ae6ad3352..e383834f688e59746408f3873d0c3794db5e43bd 100644
--- a/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py
+++ b/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py
@@ -48,9 +48,9 @@ class WiderFaceDetection(data.Dataset):
     def __init__(self, prj_dir, preproc=lt_preproc, input_size=(320, 320)):
         self.preproc = preproc
         self.input_size = input_size
-        self.image_dir = os.path.join(prj_dir, "images")
+        self.image_dir = os.path.join(prj_dir, "val/images")
 
-        testset_list = os.path.join(prj_dir, "wider_val.txt")
+        testset_list = os.path.join(prj_dir, "val/wider_val.txt")
         with open(testset_list, 'r') as fr:
             self.imgs_path = fr.read().split()
 
diff --git a/models/cv/object_detection/yolov3/igie/requirements.txt b/models/cv/object_detection/yolov3/igie/requirements.txt
index 171602527bdea43ee2216f9ad4629d83cfd92e38..d58360efcf45ff6176a24cc37580d08ef176381b 100644
--- a/models/cv/object_detection/yolov3/igie/requirements.txt
+++ b/models/cv/object_detection/yolov3/igie/requirements.txt
@@ -1,5 +1,5 @@
 tqdm
 onnx
 onnxsim
-ultralytics
+ultralytics==8.3.97
 pycocotools
diff --git a/models/cv/object_detection/yolov5/igie/requirements.txt b/models/cv/object_detection/yolov5/igie/requirements.txt
index 171602527bdea43ee2216f9ad4629d83cfd92e38..d58360efcf45ff6176a24cc37580d08ef176381b 100644
--- a/models/cv/object_detection/yolov5/igie/requirements.txt
+++ b/models/cv/object_detection/yolov5/igie/requirements.txt
@@ -1,5 +1,5 @@
 tqdm
 onnx
 onnxsim
-ultralytics
+ultralytics==8.3.97
 pycocotools
diff --git a/models/cv/object_detection/yolov5/ixrt/requirements.txt b/models/cv/object_detection/yolov5/ixrt/requirements.txt
index b0f4374b2b778c81875da50d088fecedd01689c9..10a9fba6a70545eee20ab0db7bb740b1d4807f95 100644
--- a/models/cv/object_detection/yolov5/ixrt/requirements.txt
+++ b/models/cv/object_detection/yolov5/ixrt/requirements.txt
@@ -1,7 +1,7 @@
 tqdm
 onnx
 onnxsim
-ultralytics
+ultralytics==8.3.97
 pycocotools
 opencv-python==4.6.0.66
 pycuda
\ No newline at end of file
diff --git a/models/cv/object_detection/yolov5s/ixrt/requirements.txt b/models/cv/object_detection/yolov5s/ixrt/requirements.txt
index ffb8ce179fef26f79070045778708b03b8111fce..b1a10ab060644ea96d6ad77b36dbc4367a632591 100644
--- a/models/cv/object_detection/yolov5s/ixrt/requirements.txt
+++ b/models/cv/object_detection/yolov5s/ixrt/requirements.txt
@@ -1,6 +1,6 @@
 tqdm
 onnx
 onnxsim
-ultralytics
+ultralytics==8.3.97
 pycocotools
 pycuda
\ No newline at end of file
diff --git a/models/cv/object_detection/yolov7/ixrt/requirements.txt b/models/cv/object_detection/yolov7/ixrt/requirements.txt
index b0f4374b2b778c81875da50d088fecedd01689c9..10a9fba6a70545eee20ab0db7bb740b1d4807f95 100644
--- a/models/cv/object_detection/yolov7/ixrt/requirements.txt
+++ b/models/cv/object_detection/yolov7/ixrt/requirements.txt
@@ -1,7 +1,7 @@
 tqdm
 onnx
 onnxsim
-ultralytics
+ultralytics==8.3.97
 pycocotools
 opencv-python==4.6.0.66
 pycuda
\ No newline at end of file
diff --git a/models/cv/ocr/kie_layoutxlm/igie/requirements.txt b/models/cv/ocr/kie_layoutxlm/igie/requirements.txt
index ede2fc9df150acb6ef4821e922de2ea645355f7d..2f2f00126b5801c5996ec60782011f59875bf740 100644
--- a/models/cv/ocr/kie_layoutxlm/igie/requirements.txt
+++ b/models/cv/ocr/kie_layoutxlm/igie/requirements.txt
@@ -5,6 +5,6 @@ Polygon3
 paddlenlp==2.8.1
 lanms-neo==1.0.2
 paddleocr==2.6.0
-paddle2onnx
+paddle2onnx==1.3.0
 python-bidi
 protobuf==3.20.3
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/aria/vllm/README.md b/models/multimodal/vision_language_model/aria/vllm/README.md
index ae768ce6f06f8558014b34aaee00df7d45855751..7ef43e546b4ecc15f06f2d5dc7d7e8acc0aa404c 100644
--- a/models/multimodal/vision_language_model/aria/vllm/README.md
+++ b/models/multimodal/vision_language_model/aria/vllm/README.md
@@ -37,6 +37,8 @@ In order to run the model smoothly, you need to get the sdk from [resource cente
 yum install -y mesa-libGL
 ## Ubuntu
 apt install -y libgl1-mesa-glx
+
+pip install transformer==4.48.0
 ```
 
 ## Model Inference
diff --git a/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh
index 7232aa2996f379a961cf931968a1319fb70ac091..ff5f4e533365daff8abc4c396d8b9206ec3cd6b3 100644
--- a/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh
+++ b/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh
@@ -25,3 +25,4 @@ else
 fi
 
 cp -r ../../vllm_public_assets/ ./
+pip install transformer==4.48.0
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md b/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md
index 1ed7c9116c970df30b47800496835aac9a0016c9..7a488b0a320202b0914a55f724627eb98eb482b6 100755
--- a/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md
+++ b/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md
@@ -17,6 +17,8 @@ Chameleon, an AI system that mitigates these limitations by augmenting LLMs with
 - Model: <https://huggingface.co/facebook/chameleon-7b>
 
 ```bash
+cp -r ../../vllm_public_assets/ ./
+
 # Download model from the website and make sure the model's path is "data/chameleon-7b"
 mkdir data
 ```
diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/chameleon_7b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091
--- /dev/null
+++ b/models/multimodal/vision_language_model/chameleon_7b/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/utils.py b/models/multimodal/vision_language_model/chameleon_7b/vllm/utils.py
deleted file mode 100644
index 48445ed97d08a8388a90d20e026609b5c1e88a99..0000000000000000000000000000000000000000
--- a/models/multimodal/vision_language_model/chameleon_7b/vllm/utils.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-import argparse
-import codecs
-import logging
-
-"""
-The following arguments can not be add in args...
-early_stopping: Union[bool, str] = False,
-early_stopping: Controls the stopping condition for beam search. It
-    accepts the following values: `True`, where the generation stops as
-    soon as there are `best_of` complete candidates; `False`, where an
-    heuristic is applied and the generation stops when is it very
-    unlikely to find better candidates; `"never"`, where the beam search
-    procedure only stops when there cannot be better candidates
-    (canonical beam search algorithm).
-stop: Optional[Union[str, List[str]]] = None,
-stop_token_ids: Optional[List[int]] = None,
-logits_processors: Optional[List[LogitsProcessor]] = None,
-logits_processors: List of functions that modify logits based on
-    previously generated tokens, and optionally prompt tokens as
-    a first argument.
-truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None,
-truncate_prompt_tokens: If set to an integer k, will use only the last k
-    tokens from the prompt (i.e., left truncation). Defaults to None
-    (i.e., no truncation).
-    """
-
-
-def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
-    args.add_argument(
-        "--n",
-        type=int,
-        default=1,
-        help="Number of output sequences to return for the given prompt.",
-    )
-    args.add_argument(
-        "--best-of",
-        type=int,
-        default=None,
-        help="Number of output sequences that are generated from the prompt. "
-        "From these `best_of` sequences, the top `n` sequences are returned. "
-        "`best_of` must be greater than or equal to `n`. This is treated as "
-        "the beam width when `use_beam_search` is True. By default, `best_of`"
-        "is set to `n`.",
-    )
-    args.add_argument(
-        "--presence-penalty",
-        type=float,
-        default=0.0,
-        help="Float that penalizes new tokens based on whether they "
-        "appear in the generated text so far. Values > 0 encourage the model "
-        "to use new tokens, while values < 0 encourage the model to repeat "
-        "tokens.",
-    )
-    args.add_argument(
-        "--frequency-penalty",
-        type=float,
-        default=0.0,
-        help="Float that penalizes new tokens based on their "
-        " frequency in the generated text so far. Values > 0 encourage the "
-        " model to use new tokens, while values < 0 encourage the model to "
-        "repeat tokens.",
-    )
-    args.add_argument(
-        "--repetition-penalty",
-        type=float,
-        default=1.0,
-        help="Float that penalizes new tokens based on whether "
-        "they appear in the prompt and the generated text so far. Values > 1 "
-        "encourage the model to use new tokens, while values < 1 encourage "
-        "the model to repeat tokens.",
-    )
-    args.add_argument(
-        "--temperature",
-        type=float,
-        default=1.0,
-        help="Float that controls the randomness of the sampling. Lower "
-        "values make the model more deterministic, while higher values make "
-        "the model more random. Zero means greedy sampling.",
-    )
-    args.add_argument(
-        "--top-p",
-        type=float,
-        default=1.0,
-        help="Float that controls the cumulative probability of the top tokens "
-        "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.",
-    )
-    args.add_argument(
-        "--top-k",
-        type=int,
-        default=-1,
-        help="Integer that controls the number of top tokens to consider. Set "
-        "to -1 to consider all tokens.",
-    )
-    args.add_argument(
-        "--min-p",
-        type=float,
-        default=0.0,
-        help="Float that represents the minimum probability for a token to be "
-        "considered, relative to the probability of the most likely token. "
-        "Must be in [0, 1]. Set to 0 to disable this.",
-    )
-    args.add_argument(
-        "--use-beam-search",
-        default=False,
-        action="store_true",
-        help="Whether to use beam search instead of sampling.",
-    )
-    args.add_argument(
-        "--length-penalty",
-        type=float,
-        default=1.0,
-        help="Float that penalizes sequences based on their length. Used in beam search.",
-    )
-    args.add_argument(
-        "--stop",
-        type=str,
-        default=None,
-        help="List of strings that stop the generation when they are generated. "
-        "The returned output will not contain the stop strings.",
-    )
-    args.add_argument(
-        "--stop-token-ids",
-        type=int,
-        default=None,
-        help="List of tokens that stop the generation when they are "
-        "generated. The returned output will contain the stop tokens unless "
-        "the stop tokens are special tokens.",
-    )
-    args.add_argument(
-        "--include-stop-str-in-output",
-        default=False,
-        action="store_true",
-        help="Whether to include the stop strings in output text. Defaults to False.",
-    )
-    args.add_argument(
-        "--ignore-eos",
-        default=False,
-        action="store_true",
-        help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.",
-    )
-    args.add_argument(
-        "--max-tokens",
-        type=int,
-        default=16,
-        help="Maximum number of tokens to generate per output sequence.",
-    )
-    args.add_argument(
-        "--min-tokens",
-        type=int,
-        default=0,
-        help="Minimum number of tokens to generate per output sequence "
-        "before EOS or stop_token_ids can be generated",
-    )
-    args.add_argument(
-        "--logprobs",
-        type=int,
-        default=None,
-        help="NNumber of log probabilities to return per output token. "
-        "Note that the implementation follows the OpenAI API: The return "
-        "result includes the log probabilities on the `logprobs` most likely "
-        "tokens, as well the chosen tokens. The API will always return the "
-        "log probability of the sampled token, so there  may be up to "
-        "`logprobs+1` elements in the response.",
-    )
-    args.add_argument(
-        "--prompt-logprobs",
-        type=int,
-        default=None,
-        help="Number of log probabilities to return per prompt token.",
-    )
-    args.add_argument(
-        "--detokenize",
-        type=bool,
-        default=True,
-        help="Whether to detokenize the output. Defaults to True.",
-    )
-    args.add_argument(
-        "--skip-special-tokens",
-        default=True,
-        action="store_false",
-        help="Whether to skip special tokens in the output.",
-    )
-    args.add_argument(
-        "--spaces-between-special-tokens",
-        default=True,
-        action="store_false",
-        help="Whether to add spaces between special tokens in the output.  Defaults to True.",
-    )
-    return args
-
-
-def load_chat_template(tokenizer, chat_template):
-    if chat_template is not None:
-        try:
-            with open(chat_template, "r") as f:
-                tokenizer.chat_template = f.read()
-        except OSError:
-            # If opening a file fails, set chat template to be args to
-            # ensure we decode so our escape are interpreted correctly
-            tokenizer.chat_template = codecs.decode(chat_template, "unicode_escape")
-
-        logging.info(f"Using supplied chat template:\n{tokenizer.chat_template}")
-    elif tokenizer.chat_template is not None:
-        logging.info(
-            f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
-        )
-    else:
-        logging.warning(
-            "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
-        )
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg
deleted file mode 100644
index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000
Binary files a/models/multimodal/vision_language_model/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ
diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md b/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md
index f751f8c4db94a5b7c1e170ead59ec7ad40fcfc9c..d13e0b364e215b3c4479edd6f0ee8072977f1e36 100755
--- a/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md
+++ b/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md
@@ -21,6 +21,8 @@ transformer decoder like an image transformer (albeit with no pooling and causal
 - Model: <https://huggingface.co/adept/fuyu-8b>
 
 ```bash
+cp -r ../../vllm_public_assets/ ./
+
 # Download model from the website and make sure the model's path is "data/fuyu-8b"
 mkdir data/
 ```
diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091
--- /dev/null
+++ b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/utils.py b/models/multimodal/vision_language_model/fuyu_8b/vllm/utils.py
deleted file mode 100644
index 48445ed97d08a8388a90d20e026609b5c1e88a99..0000000000000000000000000000000000000000
--- a/models/multimodal/vision_language_model/fuyu_8b/vllm/utils.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-import argparse
-import codecs
-import logging
-
-"""
-The following arguments can not be add in args...
-early_stopping: Union[bool, str] = False,
-early_stopping: Controls the stopping condition for beam search. It
-    accepts the following values: `True`, where the generation stops as
-    soon as there are `best_of` complete candidates; `False`, where an
-    heuristic is applied and the generation stops when is it very
-    unlikely to find better candidates; `"never"`, where the beam search
-    procedure only stops when there cannot be better candidates
-    (canonical beam search algorithm).
-stop: Optional[Union[str, List[str]]] = None,
-stop_token_ids: Optional[List[int]] = None,
-logits_processors: Optional[List[LogitsProcessor]] = None,
-logits_processors: List of functions that modify logits based on
-    previously generated tokens, and optionally prompt tokens as
-    a first argument.
-truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None,
-truncate_prompt_tokens: If set to an integer k, will use only the last k
-    tokens from the prompt (i.e., left truncation). Defaults to None
-    (i.e., no truncation).
-    """
-
-
-def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
-    args.add_argument(
-        "--n",
-        type=int,
-        default=1,
-        help="Number of output sequences to return for the given prompt.",
-    )
-    args.add_argument(
-        "--best-of",
-        type=int,
-        default=None,
-        help="Number of output sequences that are generated from the prompt. "
-        "From these `best_of` sequences, the top `n` sequences are returned. "
-        "`best_of` must be greater than or equal to `n`. This is treated as "
-        "the beam width when `use_beam_search` is True. By default, `best_of`"
-        "is set to `n`.",
-    )
-    args.add_argument(
-        "--presence-penalty",
-        type=float,
-        default=0.0,
-        help="Float that penalizes new tokens based on whether they "
-        "appear in the generated text so far. Values > 0 encourage the model "
-        "to use new tokens, while values < 0 encourage the model to repeat "
-        "tokens.",
-    )
-    args.add_argument(
-        "--frequency-penalty",
-        type=float,
-        default=0.0,
-        help="Float that penalizes new tokens based on their "
-        " frequency in the generated text so far. Values > 0 encourage the "
-        " model to use new tokens, while values < 0 encourage the model to "
-        "repeat tokens.",
-    )
-    args.add_argument(
-        "--repetition-penalty",
-        type=float,
-        default=1.0,
-        help="Float that penalizes new tokens based on whether "
-        "they appear in the prompt and the generated text so far. Values > 1 "
-        "encourage the model to use new tokens, while values < 1 encourage "
-        "the model to repeat tokens.",
-    )
-    args.add_argument(
-        "--temperature",
-        type=float,
-        default=1.0,
-        help="Float that controls the randomness of the sampling. Lower "
-        "values make the model more deterministic, while higher values make "
-        "the model more random. Zero means greedy sampling.",
-    )
-    args.add_argument(
-        "--top-p",
-        type=float,
-        default=1.0,
-        help="Float that controls the cumulative probability of the top tokens "
-        "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.",
-    )
-    args.add_argument(
-        "--top-k",
-        type=int,
-        default=-1,
-        help="Integer that controls the number of top tokens to consider. Set "
-        "to -1 to consider all tokens.",
-    )
-    args.add_argument(
-        "--min-p",
-        type=float,
-        default=0.0,
-        help="Float that represents the minimum probability for a token to be "
-        "considered, relative to the probability of the most likely token. "
-        "Must be in [0, 1]. Set to 0 to disable this.",
-    )
-    args.add_argument(
-        "--use-beam-search",
-        default=False,
-        action="store_true",
-        help="Whether to use beam search instead of sampling.",
-    )
-    args.add_argument(
-        "--length-penalty",
-        type=float,
-        default=1.0,
-        help="Float that penalizes sequences based on their length. Used in beam search.",
-    )
-    args.add_argument(
-        "--stop",
-        type=str,
-        default=None,
-        help="List of strings that stop the generation when they are generated. "
-        "The returned output will not contain the stop strings.",
-    )
-    args.add_argument(
-        "--stop-token-ids",
-        type=int,
-        default=None,
-        help="List of tokens that stop the generation when they are "
-        "generated. The returned output will contain the stop tokens unless "
-        "the stop tokens are special tokens.",
-    )
-    args.add_argument(
-        "--include-stop-str-in-output",
-        default=False,
-        action="store_true",
-        help="Whether to include the stop strings in output text. Defaults to False.",
-    )
-    args.add_argument(
-        "--ignore-eos",
-        default=False,
-        action="store_true",
-        help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.",
-    )
-    args.add_argument(
-        "--max-tokens",
-        type=int,
-        default=16,
-        help="Maximum number of tokens to generate per output sequence.",
-    )
-    args.add_argument(
-        "--min-tokens",
-        type=int,
-        default=0,
-        help="Minimum number of tokens to generate per output sequence "
-        "before EOS or stop_token_ids can be generated",
-    )
-    args.add_argument(
-        "--logprobs",
-        type=int,
-        default=None,
-        help="NNumber of log probabilities to return per output token. "
-        "Note that the implementation follows the OpenAI API: The return "
-        "result includes the log probabilities on the `logprobs` most likely "
-        "tokens, as well the chosen tokens. The API will always return the "
-        "log probability of the sampled token, so there  may be up to "
-        "`logprobs+1` elements in the response.",
-    )
-    args.add_argument(
-        "--prompt-logprobs",
-        type=int,
-        default=None,
-        help="Number of log probabilities to return per prompt token.",
-    )
-    args.add_argument(
-        "--detokenize",
-        type=bool,
-        default=True,
-        help="Whether to detokenize the output. Defaults to True.",
-    )
-    args.add_argument(
-        "--skip-special-tokens",
-        default=True,
-        action="store_false",
-        help="Whether to skip special tokens in the output.",
-    )
-    args.add_argument(
-        "--spaces-between-special-tokens",
-        default=True,
-        action="store_false",
-        help="Whether to add spaces between special tokens in the output.  Defaults to True.",
-    )
-    return args
-
-
-def load_chat_template(tokenizer, chat_template):
-    if chat_template is not None:
-        try:
-            with open(chat_template, "r") as f:
-                tokenizer.chat_template = f.read()
-        except OSError:
-            # If opening a file fails, set chat template to be args to
-            # ensure we decode so our escape are interpreted correctly
-            tokenizer.chat_template = codecs.decode(chat_template, "unicode_escape")
-
-        logging.info(f"Using supplied chat template:\n{tokenizer.chat_template}")
-    elif tokenizer.chat_template is not None:
-        logging.info(
-            f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
-        )
-    else:
-        logging.warning(
-            "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
-        )
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg
deleted file mode 100644
index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000
Binary files a/models/multimodal/vision_language_model/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ
diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/README.md b/models/multimodal/vision_language_model/intern_vl/vllm/README.md
index 78bb8d1b1297bba864816057c0192193e85f8849..c337a34094d9a2c4666cb2d3126aa3f64dcccc2d 100644
--- a/models/multimodal/vision_language_model/intern_vl/vllm/README.md
+++ b/models/multimodal/vision_language_model/intern_vl/vllm/README.md
@@ -21,6 +21,7 @@ learning.
 
 ```bash
 cd ${DeepSparkInference}/models/vision-language-understanding/Intern_VL/vllm
+cp -r ../../vllm_public_assets/ ./
 mkdir -p data/intern_vl
 ln -s /path/to/InternVL2-4B ./data/intern_vl
 ```
diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/intern_vl/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091
--- /dev/null
+++ b/models/multimodal/vision_language_model/intern_vl/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/utils.py b/models/multimodal/vision_language_model/intern_vl/vllm/utils.py
deleted file mode 100644
index c6def85dedc08ef9c3a489ce9dc5b1ff4a5e48b0..0000000000000000000000000000000000000000
--- a/models/multimodal/vision_language_model/intern_vl/vllm/utils.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-import codecs
-import logging
-import argparse
-
-
-def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
-    args.add_argument(
-        '--n',
-        type=int,
-        default=1,
-        help="Number of output sequences to return for the given prompt.")
-    args.add_argument(
-        '--best-of',
-        type=int,
-        default=None,
-        help="Number of output sequences that are generated from the prompt. "
-        "From these `best_of` sequences, the top `n` sequences are returned. "
-        "`best_of` must be greater than or equal to `n`. This is treated as "
-        "the beam width when `use_beam_search` is True. By default, `best_of`"
-        "is set to `n`.")
-    args.add_argument(
-        '--presence-penalty',
-        type=float,
-        default=0.0,
-        help="Float that penalizes new tokens based on whether they "
-        "appear in the generated text so far. Values > 0 encourage the model "
-        "to use new tokens, while values < 0 encourage the model to repeat "
-        "tokens.")
-    args.add_argument(
-        '--frequency-penalty',
-        type=float,
-        default=0.0,
-        help="Float that penalizes new tokens based on their "
-        " frequency in the generated text so far. Values > 0 encourage the "
-        " model to use new tokens, while values < 0 encourage the model to "
-        "repeat tokens.")
-    args.add_argument(
-        '--repetition-penalty',
-        type=float,
-        default=1.0,
-        help="Float that penalizes new tokens based on whether "
-        "they appear in the prompt and the generated text so far. Values > 1 "
-        "encourage the model to use new tokens, while values < 1 encourage "
-        "the model to repeat tokens.")
-    args.add_argument(
-        '--temperature',
-        type=float,
-        default=1.0,
-        help="Float that controls the randomness of the sampling. Lower "
-        "values make the model more deterministic, while higher values make "
-        "the model more random. Zero means greedy sampling.")
-    args.add_argument(
-        '--top-p',
-        type=float,
-        default=1.0,
-        help="Float that controls the cumulative probability of the top tokens "
-            "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.")
-    args.add_argument(
-        '--top-k',
-        type=int,
-        default=-1,
-        help="Integer that controls the number of top tokens to consider. Set "
-        "to -1 to consider all tokens.")
-    args.add_argument(
-        '--min-p',
-        type=float,
-        default=0.0,
-        help="Float that represents the minimum probability for a token to be "
-        "considered, relative to the probability of the most likely token. "
-        "Must be in [0, 1]. Set to 0 to disable this.")
-    args.add_argument(
-        '--use-beam-search',
-        default=False,
-        action="store_true",
-        help="Whether to use beam search instead of sampling.")
-    args.add_argument(
-        '--length-penalty',
-        type=float,
-        default=1.0,
-        help="Float that penalizes sequences based on their length. Used in beam search.")
-    args.add_argument(
-        '--stop',
-        type=str,
-        default=None,
-        help="List of strings that stop the generation when they are generated. "
-        "The returned output will not contain the stop strings.")
-    args.add_argument(
-        '--stop-token-ids',
-        type=int,
-        default=None,
-        help="List of tokens that stop the generation when they are "
-        "generated. The returned output will contain the stop tokens unless "
-        "the stop tokens are special tokens.")
-    args.add_argument(
-        '--include-stop-str-in-output',
-        default=False,
-        action="store_true",
-        help="Whether to include the stop strings in output text. Defaults to False.")
-    args.add_argument(
-        '--ignore-eos',
-        default=False,
-        action="store_true",
-        help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.")
-    args.add_argument(
-        '--max-tokens',
-        type=int,
-        default=16,
-        help="Maximum number of tokens to generate per output sequence.")
-    args.add_argument(
-        '--logprobs',
-        type=int,
-        default=None,
-        help="NNumber of log probabilities to return per output token. "
-        "Note that the implementation follows the OpenAI API: The return "
-        "result includes the log probabilities on the `logprobs` most likely "
-        "tokens, as well the chosen tokens. The API will always return the "
-        "log probability of the sampled token, so there  may be up to "
-        "`logprobs+1` elements in the response.")
-    args.add_argument(
-        '--prompt-logprobs',
-        type=int,
-        default=None,
-        help="Number of log probabilities to return per prompt token.")
-    args.add_argument(
-        '--skip-special-tokens',
-        default=True,
-        action="store_false",
-        help="Whether to skip special tokens in the output.")
-    args.add_argument(
-        '--spaces-between-special-tokens',
-        default=True,
-        action="store_false",
-        help="Whether to add spaces between special tokens in the output.  Defaults to True.")
-    # early_stopping logits_processors seed
-    return args
-
-
-def load_chat_template(tokenizer, chat_template):
-    if chat_template is not None:
-        try:
-            with open(chat_template, "r") as f:
-                tokenizer.chat_template = f.read()
-        except OSError:
-            # If opening a file fails, set chat template to be args to
-            # ensure we decode so our escape are interpreted correctly
-            tokenizer.chat_template = codecs.decode(
-                chat_template, "unicode_escape")
-
-        logging.info(
-            f"Using supplied chat template:\n{tokenizer.chat_template}"
-        )
-    elif tokenizer.chat_template is not None:
-        logging.info(
-            f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
-        )
-    else:
-        logging.warning(
-            "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm.")
diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg
deleted file mode 100644
index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000
Binary files a/models/multimodal/vision_language_model/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ
diff --git a/models/multimodal/vision_language_model/llava/vllm/README.md b/models/multimodal/vision_language_model/llava/vllm/README.md
index 599b66f04af0d6d093fd96be05febbad896292fd..78a2119013b612c6e26f517339cf634fa1677b54 100644
--- a/models/multimodal/vision_language_model/llava/vllm/README.md
+++ b/models/multimodal/vision_language_model/llava/vllm/README.md
@@ -22,6 +22,8 @@ reasoning.
 -llava-v1.6-vicuna-7b-hf: <https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf>
 
 ```bash
+cp -r ../../vllm_public_assets/ ./
+
 # Download model from the website and make sure the model's path is "data/llava"
 mkdir data/
 ```
diff --git a/models/multimodal/vision_language_model/llava/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/llava/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091
--- /dev/null
+++ b/models/multimodal/vision_language_model/llava/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/llava/vllm/utils.py b/models/multimodal/vision_language_model/llava/vllm/utils.py
deleted file mode 100644
index 11f23209a3175f0200ac6b5c499765101e3c3a0a..0000000000000000000000000000000000000000
--- a/models/multimodal/vision_language_model/llava/vllm/utils.py
+++ /dev/null
@@ -1,225 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-import argparse
-import codecs
-import logging
-
-"""
-The following arguments can not be add in args...
-early_stopping: Union[bool, str] = False,
-early_stopping: Controls the stopping condition for beam search. It
-    accepts the following values: `True`, where the generation stops as
-    soon as there are `best_of` complete candidates; `False`, where an
-    heuristic is applied and the generation stops when is it very
-    unlikely to find better candidates; `"never"`, where the beam search
-    procedure only stops when there cannot be better candidates
-    (canonical beam search algorithm).
-stop: Optional[Union[str, List[str]]] = None,
-stop_token_ids: Optional[List[int]] = None,
-logits_processors: Optional[List[LogitsProcessor]] = None,
-logits_processors: List of functions that modify logits based on
-    previously generated tokens, and optionally prompt tokens as
-    a first argument.
-truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None,
-truncate_prompt_tokens: If set to an integer k, will use only the last k
-    tokens from the prompt (i.e., left truncation). Defaults to None
-    (i.e., no truncation).
-    """
-
-
-def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
-    args.add_argument(
-        "--n",
-        type=int,
-        default=1,
-        help="Number of output sequences to return for the given prompt.",
-    )
-    args.add_argument(
-        "--best-of",
-        type=int,
-        default=None,
-        help="Number of output sequences that are generated from the prompt. "
-        "From these `best_of` sequences, the top `n` sequences are returned. "
-        "`best_of` must be greater than or equal to `n`. This is treated as "
-        "the beam width when `use_beam_search` is True. By default, `best_of`"
-        "is set to `n`.",
-    )
-    args.add_argument(
-        "--presence-penalty",
-        type=float,
-        default=0.0,
-        help="Float that penalizes new tokens based on whether they "
-        "appear in the generated text so far. Values > 0 encourage the model "
-        "to use new tokens, while values < 0 encourage the model to repeat "
-        "tokens.",
-    )
-    args.add_argument(
-        "--frequency-penalty",
-        type=float,
-        default=0.0,
-        help="Float that penalizes new tokens based on their "
-        " frequency in the generated text so far. Values > 0 encourage the "
-        " model to use new tokens, while values < 0 encourage the model to "
-        "repeat tokens.",
-    )
-    args.add_argument(
-        "--repetition-penalty",
-        type=float,
-        default=1.0,
-        help="Float that penalizes new tokens based on whether "
-        "they appear in the prompt and the generated text so far. Values > 1 "
-        "encourage the model to use new tokens, while values < 1 encourage "
-        "the model to repeat tokens.",
-    )
-    args.add_argument(
-        "--temperature",
-        type=float,
-        default=1.0,
-        help="Float that controls the randomness of the sampling. Lower "
-        "values make the model more deterministic, while higher values make "
-        "the model more random. Zero means greedy sampling.",
-    )
-    args.add_argument(
-        "--top-p",
-        type=float,
-        default=1.0,
-        help="Float that controls the cumulative probability of the top tokens "
-        "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.",
-    )
-    args.add_argument(
-        "--top-k",
-        type=int,
-        default=-1,
-        help="Integer that controls the number of top tokens to consider. Set "
-        "to -1 to consider all tokens.",
-    )
-    args.add_argument(
-        "--min-p",
-        type=float,
-        default=0.0,
-        help="Float that represents the minimum probability for a token to be "
-        "considered, relative to the probability of the most likely token. "
-        "Must be in [0, 1]. Set to 0 to disable this.",
-    )
-    args.add_argument(
-        "--use-beam-search",
-        default=False,
-        action="store_true",
-        help="Whether to use beam search instead of sampling.",
-    )
-    args.add_argument(
-        "--length-penalty",
-        type=float,
-        default=1.0,
-        help="Float that penalizes sequences based on their length. Used in beam search.",
-    )
-    args.add_argument(
-        "--stop",
-        type=str,
-        default=None,
-        help="List of strings that stop the generation when they are generated. "
-        "The returned output will not contain the stop strings.",
-    )
-    args.add_argument(
-        "--stop-token-ids",
-        type=int,
-        default=None,
-        help="List of tokens that stop the generation when they are "
-        "generated. The returned output will contain the stop tokens unless "
-        "the stop tokens are special tokens.",
-    )
-    args.add_argument(
-        "--include-stop-str-in-output",
-        default=False,
-        action="store_true",
-        help="Whether to include the stop strings in output text. Defaults to False.",
-    )
-    args.add_argument(
-        "--ignore-eos",
-        default=False,
-        action="store_true",
-        help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.",
-    )
-    args.add_argument(
-        "--max-tokens",
-        type=int,
-        default=16,
-        help="Maximum number of tokens to generate per output sequence.",
-    )
-    args.add_argument(
-        "--min-tokens",
-        type=int,
-        default=0,
-        help="Minimum number of tokens to generate per output sequence "
-        "before EOS or stop_token_ids can be generated",
-    )
-    args.add_argument(
-        "--logprobs",
-        type=int,
-        default=None,
-        help="NNumber of log probabilities to return per output token. "
-        "Note that the implementation follows the OpenAI API: The return "
-        "result includes the log probabilities on the `logprobs` most likely "
-        "tokens, as well the chosen tokens. The API will always return the "
-        "log probability of the sampled token, so there  may be up to "
-        "`logprobs+1` elements in the response.",
-    )
-    args.add_argument(
-        "--prompt-logprobs",
-        type=int,
-        default=None,
-        help="Number of log probabilities to return per prompt token.",
-    )
-    args.add_argument(
-        "--detokenize",
-        type=bool,
-        default=True,
-        help="Whether to detokenize the output. Defaults to True.",
-    )
-    args.add_argument(
-        "--skip-special-tokens",
-        default=True,
-        action="store_false",
-        help="Whether to skip special tokens in the output.",
-    )
-    args.add_argument(
-        "--spaces-between-special-tokens",
-        default=True,
-        action="store_false",
-        help="Whether to add spaces between special tokens in the output.  Defaults to True.",
-    )
-    return args
-
-
-def load_chat_template(tokenizer, chat_template):
-    if chat_template is not None:
-        try:
-            with open(chat_template, "r") as f:
-                tokenizer.chat_template = f.read()
-        except OSError:
-            # If opening a file fails, set chat template to be args to
-            # ensure we decode so our escape are interpreted correctly
-            tokenizer.chat_template = codecs.decode(chat_template, "unicode_escape")
-
-        logging.info(f"Using supplied chat template:\n{tokenizer.chat_template}")
-    elif tokenizer.chat_template is not None:
-        logging.info(
-            f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
-        )
-    else:
-        logging.warning(
-            "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
-        )
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/llava/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/llava/vllm/vllm_public_assets/cherry_blossom.jpg
deleted file mode 100644
index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000
Binary files a/models/multimodal/vision_language_model/llava/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ
diff --git a/models/multimodal/vision_language_model/llava_next_video_7b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/llava_next_video_7b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0fa3df9b4017331b2579cf5e039676248f79fff9
--- /dev/null
+++ b/models/multimodal/vision_language_model/llava_next_video_7b/vllm/ci/prepare.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+    apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+    yum install -y mesa-libGL
+else
+    echo "Not Support Os"
+fi
diff --git a/models/nlp/plm/albert/ixrt/ci/prepare.sh b/models/nlp/plm/albert/ixrt/ci/prepare.sh
index d78865ec0c31e4dbb393d2d89b4d4ac6a2ce391d..68e8aa19da2132447fdfe6ea48f42bc026f48d7c 100644
--- a/models/nlp/plm/albert/ixrt/ci/prepare.sh
+++ b/models/nlp/plm/albert/ixrt/ci/prepare.sh
@@ -35,7 +35,6 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requ
 
 # edit madlag/albert-base-v2-squad path
 # sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
-mv madlag ./ByteMLPerf/byte_infer_perf/general_perf/
 
 # copy open_squad data
 cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
diff --git a/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh b/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh
index 36c5cea9aa366c78077003c3271f4fd402021dd0..3ebc27f17f276362647a9716fcc7aad4e9d77e32 100644
--- a/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh
+++ b/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh
@@ -16,6 +16,7 @@
 
 set -x
 
+pip3 uninstall numpy
 pip3 install -r requirements.txt
 
 # Get pytorch weights
diff --git a/models/nlp/plm/roformer/ixrt/ci/prepare.sh b/models/nlp/plm/roformer/ixrt/ci/prepare.sh
index c3cc4f3d2e12028623cbd00969ac39960db5b490..ea80462db022331cb8b9c20f12a15e9ef8b0bdd6 100644
--- a/models/nlp/plm/roformer/ixrt/ci/prepare.sh
+++ b/models/nlp/plm/roformer/ixrt/ci/prepare.sh
@@ -28,7 +28,8 @@ python3 export_onnx.py --model_path ./data/open_roformer --output_path ./data/op
 
 # Simplify onnx model
 onnxsim ./data/open_roformer/roformer-frozen_org.onnx ./data/open_roformer/roformer-frozen.onnx
-python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer.onnx
+python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer-frozen.onnx
+cp ./data/open_roformer/roformer-frozen.onnx ./data/open_roformer/roformer.onnx
 
 # link ByteMLPerf and install requirements
 ln -s ../../../../../toolbox/ByteMLPerf ./
@@ -39,7 +40,7 @@ sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_
 
 # Move open_roformer
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
-mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+cp -r ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 
 # Setup open_cail2019 dataset
 cp /root/data/datasets/open_cail2019/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019
diff --git a/models/others/recommendation/wide_and_deep/ixrt/README.md b/models/others/recommendation/wide_and_deep/ixrt/README.md
index 8e9dd17d650123ead68c985290075b5c912be8ac..22796241f671d6bd7ff4280666270ea572dd8efb 100644
--- a/models/others/recommendation/wide_and_deep/ixrt/README.md
+++ b/models/others/recommendation/wide_and_deep/ixrt/README.md
@@ -56,7 +56,7 @@ export PROJ_PATH=./
 #### FP16
 
 ```bash
-bash scripts/infer_widedeep_fp16_performance.sh
+bash scripts/infer_wide_and_deep_fp16_performance.sh
 ```
 
 ### Accuracy
diff --git a/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh b/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh
index 2e65a751f891ad3089d1bb5e27c50a032fdaaf81..4a351d878726c5b7c7a20bc3a4ac1dd7eb021db3 100644
--- a/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh
+++ b/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh
@@ -40,7 +40,7 @@ mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
 
 cp /root/data/datasets/eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
 
-wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx
+cp /root/data/checkpoints/widedeep_dynamicshape_new.onnx ./
 cp open_wide_deep_saved_model/* ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
 mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape.onnx
 
diff --git a/models/others/recommendation/wide_and_deep/ixrt/scripts/infer_widedeep_fp16_performance.sh b/models/others/recommendation/wide_and_deep/ixrt/scripts/infer_wide_and_deep_fp16_performance.sh
similarity index 100%
rename from models/others/recommendation/wide_and_deep/ixrt/scripts/infer_widedeep_fp16_performance.sh
rename to models/others/recommendation/wide_and_deep/ixrt/scripts/infer_wide_and_deep_fp16_performance.sh
diff --git a/tests/model_info.json b/tests/model_info.json
index 7df4b9ccfe08e291ce1012cf932ebcd2d1f88937..62ef3eba0dd6fa069eef2dc39c148fe88b15b13e 100644
--- a/tests/model_info.json
+++ b/tests/model_info.json
@@ -22,8 +22,8 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "https://www.openslr.org/33/aishell.tar.gz",
-            "download_url": "http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20211025_conformer_exp.tar.gz",
+            "datasets": "https://www.openslr.org/33/aishell",
+            "download_url": "http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20211025_conformer_exp",
             "need_third_part": true,
             "precisions": [
                 "fp16"
@@ -187,7 +187,9 @@
             "datasets": "https://www.image-net.org/download.php",
             "download_url": "https://huggingface.co/openai/clip-vit-base-patch32",
             "need_third_part": "",
-            "precisions": "",
+            "precisions":  [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -279,7 +281,7 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "https://www.image-net.org/download.php",
-            "download_url": "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth",
+            "download_url": "https://download.pytorch.org/models/convnext_base-6075fbad.pth",
             "need_third_part": "",
             "precisions": [
                 "fp16"
@@ -1018,7 +1020,7 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "https://www.image-net.org/download.php",
-            "download_url": "https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth",
+            "download_url": "https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth",
             "need_third_part": false,
             "precisions": [
                 "fp16",
@@ -2416,8 +2418,7 @@
             "download_url": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth",
             "need_third_part": false,
             "precisions": [
-                "fp16",
-                "int8"
+                "fp16"
             ],
             "type": "inference",
             "hasDemo": false,
@@ -3155,10 +3156,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/tmp",
+            "download_url": "https://drive.google.com/open?id=1R77HmFADxe87GmoLwzfgMu_HY0IhcyBz",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -3185,10 +3188,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/coco",
+            "download_url": "http://files.deepspark.org.cn:880/deepspark/wts/maskrcnn.wts",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -3215,10 +3220,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/coco2017",
+            "download_url": "https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -3245,10 +3252,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/market1501",
+            "download_url": "https://local/ckpt.t7",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -3275,10 +3284,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/VehicleID",
+            "download_url": "https://github.com/JDAI-CV/fast-reid/releases/download/v0.1.1/vehicleid_bot_R50-ibn.pth",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -3305,10 +3316,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/VehicleID",
+            "download_url": "https://local/epoch_14.pth",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -3559,10 +3572,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/coco",
+            "download_url": "https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -3621,10 +3636,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/coco",
+            "download_url": "https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -3683,10 +3700,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/coco",
+            "download_url": "https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -3777,10 +3796,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/widerface",
+            "download_url": "https://github.com/biubug6/Face-Detector-1MB-with-landmark/raw/master/weights/mobilenet0.25_Final.pth",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -4688,7 +4709,9 @@
             "datasets": "",
             "download_url": "",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": true,
             "demoType": "image"
@@ -4716,9 +4739,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://huggingface.co/facebook/chameleon-7b",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -4748,7 +4773,9 @@
             "datasets": "",
             "download_url": "",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -4776,9 +4803,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://huggingface.co/adept/fuyu-8b",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -4806,9 +4835,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://huggingface.co/OpenGVLab/InternVL2-4B",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": true,
             "demoType": "image-to-text"
@@ -4836,9 +4867,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -4866,39 +4899,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
-            "need_third_part": "",
-            "precisions": "",
-            "type": "inference",
-            "hasDemo": false,
-            "demoType": ""
-        },
-        {
-            "model_name": "minicpm_v",
-            "framework": "vllm",
-            "release_version": "25.03",
-            "release_sdk": "CoreX 4.2.0",
-            "release_gpgpu": "BI-V150",
-            "latest_sdk": "4.2.0",
-            "latest_gpgpu": "BI-V150",
-            "category": "multimodal/vision_language_model",
-            "toolbox": "",
-            "mdims": "",
-            "dataset": "",
-            "license": "",
-            "model_path": "models/multimodal/vision_language_model/minicpm_v/vllm/",
-            "readme_file": "models/multimodal/vision_language_model/minicpm_v/vllm/README.md",
-            "bitbucket_repo": "",
-            "bitbucket_branch": "",
-            "bitbucket_path": "",
-            "develop_owner": "",
-            "github_repo": "",
-            "github_branch": "",
-            "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "download_url": "https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5023,9 +5028,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5053,9 +5060,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5083,9 +5092,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5113,9 +5124,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5143,9 +5156,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5173,9 +5188,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": true,
             "demoType": "chat"
@@ -5491,9 +5508,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://modelscope.cn/models/qwen/Qwen1.5-7B",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5651,7 +5670,9 @@
             "datasets": "",
             "download_url": "",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5678,10 +5699,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/china-people-daily-ner-corpus",
+            "download_url": "https://huggingface.co/bert-base-chinese",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "int8"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5708,10 +5731,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/SQuAD",
+            "download_url": "https://huggingface.co/csarron/bert-base-uncased-squad-v1",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5739,9 +5764,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://local/bert_base_uncased_squad",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5768,10 +5795,12 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "",
-            "download_url": "",
+            "datasets": "local/SQuAD",
+            "download_url": "https://huggingface.co/neuralmagic/bert-large-uncased-finetuned-squadv1",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5799,9 +5828,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://local/bert-large-uncased",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5829,9 +5860,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_deberta",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5859,9 +5892,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5889,9 +5924,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roformer",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -5919,9 +5956,11 @@
             "github_branch": "",
             "github_path": "",
             "datasets": "",
-            "download_url": "",
+            "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_videobert",
             "need_third_part": "",
-            "precisions": "",
+            "precisions": [
+                "fp16"
+            ],
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
@@ -6428,7 +6467,7 @@
             "github_repo": "",
             "github_branch": "",
             "github_path": "",
-            "datasets": "cityscapes",
+            "datasets": "local/cityscapes",
             "download_url": "https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth",
             "need_third_part": false,
             "precisions": [
@@ -6855,7 +6894,7 @@
             "demoType": ""
         },
         {
-            "model_name": "mllama",
+            "model_name": "llama-3.2",
             "framework": "vllm",
             "release_version": "25.06",
             "release_sdk": "4.2.0",
@@ -6867,8 +6906,8 @@
             "mdims": "",
             "dataset": "",
             "license": "",
-            "model_path": "models/multimodal/vision_language_model/mllama/vllm",
-            "readme_file": "models/multimodal/vision_language_model/mllama/vllm/README.md",
+            "model_path": "models/multimodal/vision_language_model/llama-3.2/vllm",
+            "readme_file": "models/multimodal/vision_language_model/llama-3.2/vllm/README.md",
             "bitbucket_repo": "",
             "bitbucket_branch": "",
             "bitbucket_path": "",
diff --git a/tests/run_igie.py b/tests/run_igie.py
index 46e5636b98effaef49606ed74a94596f13013d45..3b9f7cf05c7d122ce402051ab44bde0900178ecb 100644
--- a/tests/run_igie.py
+++ b/tests/run_igie.py
@@ -94,6 +94,16 @@ def main():
             logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}")
         logging.info(f"End running {model['model_name']} test case.")
 
+    # multi_object_tracking模型
+    if model["category"] in ["cv/multi_object_tracking"]:
+        logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_multi_object_tracking_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['model_name']} test case.")
+
     # Speech模型
     if model["category"] in ["audio/speech_recognition"]:
         logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
@@ -159,13 +169,22 @@ def run_clf_testcase(model):
 
     for prec in model["precisions"]:
         logging.info(f"Start running {model_name} {prec} test case")
-        script = f"""
-        export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val
-        export RUN_DIR=../../igie_common/
-        cd ../{model['model_path']}
-        bash scripts/infer_{model_name}_{prec}_accuracy.sh
-        bash scripts/infer_{model_name}_{prec}_performance.sh
-        """
+        if model_name == "unet":
+            script = f"""
+            export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n}
+            export RUN_DIR=../../igie_common/
+            cd ../{model['model_path']}
+            bash scripts/infer_{model_name}_{prec}_accuracy.sh
+            bash scripts/infer_{model_name}_{prec}_performance.sh
+            """
+        else:
+            script = f"""
+            export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val
+            export RUN_DIR=../../igie_common/
+            cd ../{model['model_path']}
+            bash scripts/infer_{model_name}_{prec}_accuracy.sh
+            bash scripts/infer_{model_name}_{prec}_performance.sh
+            """
 
         r, t = run_script(script)
         sout = r.stdout
@@ -213,9 +232,9 @@ def run_detec_testcase(model):
     ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
     """
 
-    if model["need_third_part"] and model["3rd_party_repo"]:
-        third_party_repo = model["3rd_party_repo"]
-        prepare_script += f"unzip /mnt/deepspark/data/3rd_party/{third_party_repo}.zip -d ./\n"
+    # if model["need_third_part"] and model["3rd_party_repo"]:
+    #     third_party_repo = model["3rd_party_repo"]
+    #     prepare_script += f"unzip /mnt/deepspark/data/3rd_party/{third_party_repo}.zip -d ./\n"
     prepare_script += "bash ci/prepare.sh\n"
 
     # add pip list info when in debug mode
@@ -384,6 +403,63 @@ def run_trace_testcase(model):
         logging.debug(f"matchs:\n{matchs}")
     return result
 
+def run_multi_object_tracking_testcase(model):
+    model_name = model["model_name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['model_path']}
+    ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./
+    ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
+    """
+
+    prepare_script += """
+    bash ci/prepare.sh
+    ls -l | grep onnx
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['model_path']}
+        export DATASETS_DIR=./{dataset_n}/
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*"
+        matchs = re.findall(pattern, sout)
+        for m in matchs:
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            try:
+                result["result"][prec] = result["result"][prec] | {m[0]: float(m[1]), m[2]: float(m[3])}
+            except ValueError:
+                print("The string cannot be converted to a float.")
+                result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
+        pattern = METRIC_PATTERN
+        matchs = re.findall(pattern, sout)
+        if matchs and len(matchs) == 1:
+            result["result"].setdefault(prec, {})
+            result["result"][prec].update(get_metric_result(matchs[0]))
+            result["result"][prec]["status"] = "PASS"
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+    return result
+
 # BERT series models
 def run_nlp_testcase(model):
     model_name = model["model_name"]
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index a19223ad859fdc6f4cf8d9e14c3d7c93086925ad..9464042144020809ff2d7f3983ff74d924e1df3f 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -94,6 +94,16 @@ def main():
             logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}")
         logging.info(f"End running {model['model_name']} test case.")
 
+    # instance_segmentation模型
+    if model["category"] in ["cv/instance_segmentation"]:
+        logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
+        d_url = model["download_url"]
+        if d_url is not None:
+            result = run_instance_segmentation_testcase(model)
+            check_model_result(result)
+            logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}")
+        logging.info(f"End running {model['model_name']} test case.")
+
     # NLP模型
     if model["category"] in ["nlp/plm", "others/recommendation"]:
         logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
@@ -191,9 +201,29 @@ def run_clf_testcase(model):
                     match_count += 1
                     result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}")
                     break
-
         if match_count == len(patterns):
             result["result"][prec]["status"] = "PASS"
+
+        if model_name == "swin_transformer_large":
+            pattern = r'Throughput: (\d+\.\d+) qps'
+            matchs = re.findall(pattern, sout)
+            for m in matchs:
+                result["result"].setdefault(prec, {"status": "FAIL"})
+                try:
+                    result["result"][prec]["QPS"] = float(m)
+                except ValueError:
+                    print("The string cannot be converted to a float.")
+                    result["result"][prec]["QPS"] = m
+
+            pattern = METRIC_PATTERN
+            matchs = re.findall(pattern, sout)
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            logging.debug(f"matchs:\n{matchs}")
+            for m in matchs:
+                result["result"][prec].update(get_metric_result(m))
+            if len(matchs) == 1:
+                result["result"][prec]["status"] = "PASS"
+        
         result["result"][prec]["Cost time (s)"] = t
         logging.debug(f"matchs:\n{matchs}")
     return result
@@ -375,7 +405,9 @@ def run_nlp_testcase(model):
         bash scripts/infer_{model_name}_{prec}_performance.sh
         cd ./ByteMLPerf/byte_infer_perf/general_perf
         """
-        if model_name == "roformer" or model_name == "widedeep":
+        if model_name == "roformer" or model_name == "wide_and_deep":
+            if model_name == "wide_and_deep":
+               model_name = "widedeep" 
             script += f"""
             python3 core/perf_engine.py --hardware_type ILUVATAR --task {model_name}-tf-fp32
             """
@@ -414,13 +446,23 @@ def run_nlp_testcase(model):
         r, t = run_script(script)
         sout = r.stdout
 
+        pattern = r'Throughput: (\d+\.\d+) qps'
+        matchs = re.findall(pattern, sout)
+        for m in matchs:
+            result["result"].setdefault(prec, {"status": "FAIL"})
+            try:
+                result["result"][prec]["QPS"] = float(m)
+            except ValueError:
+                print("The string cannot be converted to a float.")
+                result["result"][prec]["QPS"] = m
+
         pattern = METRIC_PATTERN
         matchs = re.findall(pattern, sout)
         result["result"].setdefault(prec, {"status": "FAIL"})
         logging.debug(f"matchs:\n{matchs}")
         for m in matchs:
             result["result"][prec].update(get_metric_result(m))
-        if len(matchs) == 2:
+        if len(matchs) == 1:
             result["result"][prec]["status"] = "PASS"
 
         result["result"][prec]["Cost time (s)"] = t
@@ -478,6 +520,59 @@ def run_speech_testcase(model):
         logging.debug(f"matchs:\n{matchs}")
     return result
 
+def run_instance_segmentation_testcase(model):
+    model_name = model["model_name"]
+    result = {
+        "name": model_name,
+        "result": {},
+    }
+    d_url = model["download_url"]
+    checkpoint_n = d_url.split("/")[-1]
+    dataset_n = model["datasets"].split("/")[-1]
+    prepare_script = f"""
+    cd ../{model['model_path']}
+    ln -s /root/data/checkpoints/{checkpoint_n} ./
+    ln -s /root/data/datasets/{dataset_n} ./
+    bash ci/prepare.sh
+    ls -l | grep onnx
+    """
+
+    # add pip list info when in debug mode
+    if utils.is_debug():
+        pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+        prepare_script = pip_list_script + prepare_script + pip_list_script
+
+    run_script(prepare_script)
+
+    for prec in model["precisions"]:
+        logging.info(f"Start running {model_name} {prec} test case")
+        script = f"""
+        cd ../{model['model_path']}
+        export PROJ_DIR=./
+        export DATASETS_DIR=./coco2017/
+        export CHECKPOINTS_DIR=./checkpoints
+        export COCO_GT=./coco2017/annotations/instances_val2017.json
+        export EVAL_DIR=./coco2017/val2017
+        export RUN_DIR=./
+        bash scripts/infer_{model_name}_{prec}_accuracy.sh
+        bash scripts/infer_{model_name}_{prec}_performance.sh
+        """
+
+        r, t = run_script(script)
+        sout = r.stdout
+        pattern = METRIC_PATTERN
+        matchs = re.findall(pattern, sout)
+        result["result"].setdefault(prec, {"status": "FAIL"})
+        logging.debug(f"matchs:\n{matchs}")
+        for m in matchs:
+            result["result"][prec].update(get_metric_result(m))
+        if len(matchs) == 2:
+            result["result"][prec]["status"] = "PASS"
+
+        result["result"][prec]["Cost time (s)"] = t
+        logging.debug(f"matchs:\n{matchs}")
+    return result
+
 def get_metric_result(str):
     if str:
         return json.loads(str.replace("'", "\""))["metricResult"]
diff --git a/tests/run_trtllm.py b/tests/run_trtllm.py
index c57e02816ef6dba5f829ecfaf2e7b1e3849d0da5..ac79b3b9591e4cd65269a4d5e0341b4b2d2d0a9a 100644
--- a/tests/run_trtllm.py
+++ b/tests/run_trtllm.py
@@ -72,7 +72,7 @@ def get_model_config(mode_name):
         models = json.load(file)
 
     for model in models['models']:
-        if model["model_name"] == mode_name.lower() and model["framework"] == "trtllm":
+        if model["model_name"] == mode_name.lower() and (model["framework"] == "trtllm" or model["framework"] == "tgi"):
             return model
     return
 
diff --git a/tests/run_vllm.py b/tests/run_vllm.py
index a200569cfda5db31f720bcd9ccffba70399a056b..e05973fdd535f8d626c53056c2280e8dcbf214e6 100644
--- a/tests/run_vllm.py
+++ b/tests/run_vllm.py
@@ -205,10 +205,14 @@ def run_nlp_testcase(model):
             python3 offline_inference.py --model ./stablelm --max-tokens 256 -tp 1 --temperature 0.0
             """
         elif model_name.startswith("deepseek-r1-distill-"):
+            if model_name == "deepseek-r1-distill-qwen-32b":
+                tp = 4
+            else:
+                tp = 2
             script = f"""
             set -x
             cd ../{model['model_path']}
-            python3 offline_inference.py --model ./{model_name} --max-tokens 256 -tp 2 --temperature 0.0 --max-model-len 3096
+            python3 offline_inference.py --model ./{model_name} --max-tokens 256 -tp {tp} --temperature 0.0 --max-model-len 3096
             """
         elif model_name == "aria":
             script = f"""
@@ -217,6 +221,13 @@ def run_nlp_testcase(model):
             export VLLM_ASSETS_CACHE=../vllm/
             python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --dtype bfloat16 --tokenizer-mode slow
             """
+        elif model_name == "chameleon_7b" or model_name == "fuyu_8b":
+            script = f"""
+            set -x
+            cd ../{model['model_path']}
+            export VLLM_ASSETS_CACHE=../vllm/
+            python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0
+            """
         elif model_name == "h2vol" or model_name == "idefics3":
             script = f"""
             set -x
@@ -231,7 +242,7 @@ def run_nlp_testcase(model):
             export VLLM_ASSETS_CACHE=../vllm/
             PT_SDPA_ENABLE_HEAD_DIM_PADDING=1 python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0
             """
-        elif model_name == "mllama":
+        elif model_name == "llama-3.2":
             script = f"""
             set -x
             cd ../{model['model_path']}
@@ -246,6 +257,27 @@ def run_nlp_testcase(model):
             export VLLM_ASSETS_CACHE=../vllm/
             python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --tokenizer-mode 'mistral'
             """
+        elif model_name == "llava":
+            script = f"""
+            set -x
+            cd ../{model['model_path']}
+            export VLLM_ASSETS_CACHE=../vllm/
+            python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --model-type llava-next --max-model-len 4096
+            """
+        elif model_name == "llava_next_video_7b":
+            script = f"""
+            set -x
+            cd ../{model['model_path']}
+            export VLLM_ASSETS_CACHE=../vllm/
+            python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --model-type llava-next-video --modality video --dtype bfloat16
+            """
+        elif model_name == "intern_vl":
+            script = f"""
+            set -x
+            cd ../{model['model_path']}
+            export VLLM_ASSETS_CACHE=../vllm/
+            python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 2 --temperature 0.0 --max-model-len 2048
+            """
 
         r, t = run_script(script)
         sout = r.stdout
@@ -257,6 +289,13 @@ def run_nlp_testcase(model):
             result["result"][prec]["tokens"] = int(matchs.group(1))
             result["result"][prec]["QPS"] = float(matchs.group(2))
             result["result"][prec]["status"] = "PASS"
+        else:
+            pattern = r"Maximum concurrency for (\d+) tokens per request: ([\d.]+)x"
+            matchs = re.search(pattern, sout)
+            if matchs:
+                result["result"][prec]["tokens"] = int(matchs.group(1))
+                result["result"][prec]["QPS"] = float(matchs.group(2))
+                result["result"][prec]["status"] = "PASS"
 
         result["result"][prec]["Cost time (s)"] = t
     return result
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
index 089d9860f573bba7e19f84aa20fb830a8fcc22d8..f8a2797282b4a2edbace565b8a7d68ad3090ea48 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
@@ -210,6 +210,9 @@ class PerfEngine:
 
         if accuracy_report:
             base_report['Accuracy'] = accuracy_report
+            metricResult = {}
+            metricResult['metricResult'] = accuracy_report
+            print(metricResult)
 
         # function to test qps and latency
         if workload['test_perf']: