diff --git a/models/audio/speech_recognition/conformer/igie/README.md b/models/audio/speech_recognition/conformer/igie/README.md
index 9141b3c8bab3498c3ef8d9638de8f945a9089b70..ae96f9d4b9433e57973f2f7d6d1b5f1e206ef9aa 100644
--- a/models/audio/speech_recognition/conformer/igie/README.md
+++ b/models/audio/speech_recognition/conformer/igie/README.md
@@ -24,6 +24,12 @@ Dataset: to download the Aishell dataset.
### Install Dependencies
```bash
+# Install libGL
+## CentOS
+yum install sox sox-devel -y
+## Ubuntu
+apt install sox libsox-fmt-all -y
+
pip3 install -r requirements.txt
cd ctc_decoder/swig && bash setup.sh
cd ../../
diff --git a/models/audio/speech_recognition/conformer/igie/ci/prepare.sh b/models/audio/speech_recognition/conformer/igie/ci/prepare.sh
index 49f448a67b02f4ee0dd8b313948e9b8d710c2fce..8290acf0fb594993f0439c539f7abfff54a34a15 100644
--- a/models/audio/speech_recognition/conformer/igie/ci/prepare.sh
+++ b/models/audio/speech_recognition/conformer/igie/ci/prepare.sh
@@ -16,6 +16,15 @@
set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install sox libsox-fmt-all -y
+elif [[ ${ID} == "centos" ]]; then
+ yum install sox sox-devel -y
+else
+ echo "Not Support Os"
+fi
+
pip3 install -r requirements.txt
cd ctc_decoder/swig && bash setup.sh
cd ../../
@@ -39,4 +48,4 @@ onnxsim encoder_bs24_seq384_static.onnx encoder_bs24_seq384_static_opt.onnx
python3 alter_onnx.py --batch_size 24 --path encoder_bs24_seq384_static_opt.onnx
# Need to unzip aishell to the current directory. For details, refer to data.list
-tar -zxvf aishell.tar.gz
+# tar -zxvf aishell.tar.gz
diff --git a/models/audio/speech_recognition/conformer/igie/requirements.txt b/models/audio/speech_recognition/conformer/igie/requirements.txt
index 2f7cd1f24262857100607eb19f6ccc14b7e98a31..8820eb754dec653c319dc0c86d53049346c7f7b6 100644
--- a/models/audio/speech_recognition/conformer/igie/requirements.txt
+++ b/models/audio/speech_recognition/conformer/igie/requirements.txt
@@ -1,4 +1,4 @@
tqdm
onnx
typeguard==2.13.3
-onnxsim
+onnxsim
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py
index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..377e110b36cc140a55edc9dcc1b20dc5f91387a2 100644
--- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py
+++ b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py
@@ -29,7 +29,7 @@ from wenet.utils.cmvn import load_cmvn
def init_model(configs):
if configs['cmvn_file'] is not None:
- mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn'])
+ mean, istd = load_cmvn(configs['cmvn_file'], configs['cmvn_conf']['is_json_cmvn'])
global_cmvn = GlobalCMVN(
torch.from_numpy(mean).float(),
torch.from_numpy(istd).float())
diff --git a/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh b/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
index 5a1f966836c58193331ab4d43411a5622c04ad79..3b9bb751e92924655b447fdc785899465a846b61 100644
--- a/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
+++ b/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh
@@ -32,6 +32,8 @@ cp -r /root/data/checkpoints/8886 results/transformer/
mkdir -p results/transformer/8886/save
mkdir -p /home/data/speechbrain/aishell/csv_data
ln -s /root/data/datasets/AISHELL/data_aishell /home/data/speechbrain/aishell/
+cp /root/data/datasets/rirs_noises.zip /home/data/speechbrain/aishell/
+unzip -o /home/data/speechbrain/aishell/rirs_noises.zip -d /home/data/speechbrain/aishell/
cp results/transformer/8886/*.csv /home/data/speechbrain/aishell/csv_data
bash build.sh
diff --git a/models/cv/classification/densenet121/ixrt/README.md b/models/cv/classification/densenet121/ixrt/README.md
index 58cce7a327e3d3b00c4b2ccf289e23124050d02a..a5dbc7c7f19a4121e1d769ec50a9b7e2c308489b 100644
--- a/models/cv/classification/densenet121/ixrt/README.md
+++ b/models/cv/classification/densenet121/ixrt/README.md
@@ -33,8 +33,9 @@ pip3 install -r ../../ixrt_common/requirements.txt
### Model Conversion
```bash
+# download model into /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
mkdir checkpoints
-python3 ../../ixrt_common/export.py --model-name densenet121 --weight densenet121-a639ec97.pth --output checkpoints/densenet121.onnx
+python3 export.py --output checkpoints/densenet121.onnx
```
## Model Inference
diff --git a/models/cv/classification/densenet121/ixrt/ci/prepare.sh b/models/cv/classification/densenet121/ixrt/ci/prepare.sh
index 3ac521c0ed745dd4b98f475b9af614bff3137105..8d542a84ee0e27037281fc85a3a9799d159476e8 100644
--- a/models/cv/classification/densenet121/ixrt/ci/prepare.sh
+++ b/models/cv/classification/densenet121/ixrt/ci/prepare.sh
@@ -27,4 +27,4 @@ fi
pip install -r ../../ixrt_common/requirements.txt
mkdir checkpoints
-python3 ../../ixrt_common/export.py --model-name densenet121 --weight densenet121-a639ec97.pth --output checkpoints/densenet121.onnx
\ No newline at end of file
+python3 export.py --output checkpoints/densenet121.onnx
\ No newline at end of file
diff --git a/models/cv/classification/densenet121/ixrt/export.py b/models/cv/classification/densenet121/ixrt/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff86753a004cd6611c7c7104e0061904bc3d2184
--- /dev/null
+++ b/models/cv/classification/densenet121/ixrt/export.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import torch
+import torchvision.models as models
+import argparse
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--output_model", type=str)
+ args = parser.parse_args()
+ return args
+
+args = parse_args()
+model = models.densenet121(pretrained=True)
+model.cuda()
+model.eval()
+input = torch.randn(1, 3, 224, 224, device='cuda')
+export_onnx_file = args.output_model
+
+torch.onnx.export(model,
+ input,
+ export_onnx_file,
+ export_params=True,
+ opset_version=11,
+ do_constant_folding=True,
+ input_names = ['input'],
+ output_names = ['output'],)
+print(" ")
+print('Model has been converted to ONNX')
+print("exit")
+exit()
diff --git a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
index c69f7471f6a82156b66ba0cd953c0e25e0d9ec17..7bb940752879bc68163f7a4dc4c31e3494dbec54 100644
--- a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
+++ b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh
@@ -28,4 +28,4 @@ fi
pip install -r ../../ixrt_common/requirements.txt
mkdir checkpoints
-python3 ../../ixrt_common/export_onnx.py --model-name efficientnet_b1 --output_model checkpoints/efficientnet-b1.onnx
\ No newline at end of file
+python3 ../../ixrt_common/export.py --model-name efficientnet_b1 --weight efficientnet_b1-c27df63c.pth --output checkpoints/efficientnet_b1.onnx
\ No newline at end of file
diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/inference.py b/models/cv/classification/efficientnetv2_rw_t/igie/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..e33c91fa1de2d25402f0ad3318e15f372d829908
--- /dev/null
+++ b/models/cv/classification/efficientnetv2_rw_t/igie/inference.py
@@ -0,0 +1,183 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import sys
+import argparse
+import tvm
+import torch
+import torchvision
+import numpy as np
+from tvm import relay
+from tqdm import tqdm
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+
+from timm.data import create_dataset, create_loader
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--engine",
+ type=str,
+ required=True,
+ help="igie engine path.")
+
+ parser.add_argument("--batchsize",
+ type=int,
+ required=True,
+ help="inference batch size.")
+
+ parser.add_argument("--datasets",
+ type=str,
+ required=True,
+ help="datasets path.")
+
+ parser.add_argument("--input_name",
+ type=str,
+ required=True,
+ help="input name of the model.")
+
+ parser.add_argument("--warmup",
+ type=int,
+ default=3,
+ help="number of warmup before test.")
+
+ parser.add_argument("--num_workers",
+ type=int,
+ default=16,
+ help="number of workers used in pytorch dataloader.")
+
+ parser.add_argument("--acc_target",
+ type=float,
+ default=None,
+ help="Model inference Accuracy target.")
+
+ parser.add_argument("--fps_target",
+ type=float,
+ default=None,
+ help="Model inference FPS target.")
+
+ parser.add_argument("--perf_only",
+ type=bool,
+ default=False,
+ help="Run performance test only")
+
+ args = parser.parse_args()
+
+ return args
+
+def get_dataloader(data_path, batch_size, num_workers):
+ datasets = create_dataset(root=data_path, name="")
+
+ dataloader = create_loader(
+ datasets,
+ input_size=(3, 288, 288),
+ batch_size=batch_size,
+ interpolation='bicubic',
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225),
+ crop_pct=1.0,
+ use_prefetcher = False,
+ num_workers = num_workers
+ )
+ return dataloader
+
+def get_topk_accuracy(pred, label):
+ if isinstance(pred, np.ndarray):
+ pred = torch.from_numpy(pred)
+
+ if isinstance(label, np.ndarray):
+ label = torch.from_numpy(label)
+
+ top1_acc = 0
+ top5_acc = 0
+ for idx in range(len(label)):
+ label_value = label[idx]
+ if label_value == torch.topk(pred[idx].float(), 1).indices.data:
+ top1_acc += 1
+ top5_acc += 1
+
+ elif label_value in torch.topk(pred[idx].float(), 5).indices.data:
+ top5_acc += 1
+
+ return top1_acc, top5_acc
+
+def main():
+ args = parse_args()
+
+ batch_size = args.batchsize
+
+ # create iluvatar target & device
+ target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer")
+ device = tvm.device(target.kind.name, 0)
+
+ # load engine
+ lib = tvm.runtime.load_module(args.engine)
+
+ # create runtime from engine
+ module = tvm.contrib.graph_executor.GraphModule(lib["default"](device))
+
+ # just run perf test
+ if args.perf_only:
+ ftimer = module.module.time_evaluator("run", device, number=100, repeat=1)
+ prof_res = np.array(ftimer().results) * 1000
+ fps = batch_size * 1000 / np.mean(prof_res)
+ print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}")
+ else:
+ # warm up
+ for _ in range(args.warmup):
+ module.run()
+
+ # get dataloader
+ dataloader = get_dataloader(args.datasets, batch_size, args.num_workers)
+
+ top1_acc = 0
+ top5_acc = 0
+ total_num = 0
+
+ for image, label in tqdm(dataloader):
+
+ # pad the last batch
+ pad_batch = len(image) != batch_size
+
+ if pad_batch:
+ origin_size = len(image)
+ image = np.resize(image, (batch_size, *image.shape[1:]))
+
+ module.set_input(args.input_name, tvm.nd.array(image, device))
+
+ # run inference
+ module.run()
+
+ pred = module.get_output(0).asnumpy()
+
+ if pad_batch:
+ pred = pred[:origin_size]
+
+ # get batch accuracy
+ batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label)
+
+ top1_acc += batch_top1_acc
+ top5_acc += batch_top5_acc
+ total_num += batch_size
+
+ result_stat = {}
+ result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3)
+ result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3)
+
+ print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/requirements.txt b/models/cv/classification/efficientnetv2_rw_t/igie/requirements.txt
deleted file mode 100644
index 36677a29ab3a81e04e55e2185513580169404d15..0000000000000000000000000000000000000000
--- a/models/cv/classification/efficientnetv2_rw_t/igie/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-timm
-onnx
-tqdm
diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh
index 9d96fcfb93ffd560a2a682f94068fd3322833a93..7e2e5ffbc665d6e70d0dc5ff7bcf0b870d79dd1b 100644
--- a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh
+++ b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh
@@ -28,7 +28,7 @@ python3 ${RUN_DIR}build_engine.py \
# inference
-python3 ${RUN_DIR}inference.py \
+python3 inference.py \
--engine efficientnetv2_rw_t_bs_${batchsize}_fp16.so \
--batchsize ${batchsize} \
--input_name input \
diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh
index a3f9d58520cb98365e5c789fce1f07dba5627249..c08b48407740ee447d6bad514fe0aa76c001aec6 100644
--- a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh
+++ b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh
@@ -28,7 +28,7 @@ python3 ${RUN_DIR}build_engine.py \
# inference
-python3 ${RUN_DIR}inference.py \
+python3 inference.py \
--engine efficientnetv2_rw_t_bs_${batchsize}_fp16.so \
--batchsize ${batchsize} \
--input_name input \
diff --git a/models/cv/classification/ixrt_common/build_i8_engine.py b/models/cv/classification/ixrt_common/build_i8_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..6038b33f50cff7a14efcefa6673ae9d2fd19870b
--- /dev/null
+++ b/models/cv/classification/ixrt_common/build_i8_engine.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import argparse
+import json
+import os
+
+import tensorrt
+import tensorrt as trt
+
+TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE)
+
+EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+
+
+def GiB(val):
+ return val * 1 << 30
+
+
+def json_load(filename):
+ with open(filename) as json_file:
+ data = json.load(json_file)
+ return data
+
+
+def setDynamicRange(network, json_file):
+ """Sets ranges for network layers."""
+ quant_param_json = json_load(json_file)
+ act_quant = quant_param_json["act_quant_info"]
+
+ for i in range(network.num_inputs):
+ input_tensor = network.get_input(i)
+ if act_quant.__contains__(input_tensor.name):
+ print(input_tensor.name)
+ value = act_quant[input_tensor.name]
+ tensor_max = abs(value)
+ tensor_min = -abs(value)
+ input_tensor.dynamic_range = (tensor_min, tensor_max)
+
+ for i in range(network.num_layers):
+ layer = network.get_layer(i)
+
+ for output_index in range(layer.num_outputs):
+ tensor = layer.get_output(output_index)
+
+ if act_quant.__contains__(tensor.name):
+ value = act_quant[tensor.name]
+ tensor_max = abs(value)
+ tensor_min = -abs(value)
+ tensor.dynamic_range = (tensor_min, tensor_max)
+ else:
+ print("\033[1;32m%s\033[0m" % tensor.name)
+
+
+def build_engine(onnx_file, json_file, engine_file):
+ builder = trt.Builder(TRT_LOGGER)
+ network = builder.create_network(EXPLICIT_BATCH)
+
+ config = builder.create_builder_config()
+
+ # If it is a dynamic onnx model , you need to add the following.
+ # profile = builder.create_optimization_profile()
+ # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w))
+ # config.add_optimization_profile(profile)
+
+ parser = trt.OnnxParser(network, TRT_LOGGER)
+ # config.max_workspace_size = GiB(1)
+ if not os.path.exists(onnx_file):
+ quit("ONNX file {} not found".format(onnx_file))
+
+ with open(onnx_file, "rb") as model:
+ if not parser.parse(model.read()):
+ print("ERROR: Failed to parse the ONNX file.")
+ for error in range(parser.num_errors):
+ print(parser.get_error(error))
+ return None
+
+ config.set_flag(trt.BuilderFlag.INT8)
+
+ setDynamicRange(network, json_file)
+
+ engine = builder.build_engine(network, config)
+
+ with open(engine_file, "wb") as f:
+ f.write(engine.serialize())
+
+
+if __name__ == "__main__":
+ # Add plugins if needed
+ # import ctypes
+ # ctypes.CDLL("libmmdeploy_tensorrt_ops.so")
+ parser = argparse.ArgumentParser(
+ description="Writing qparams to onnx to convert tensorrt engine."
+ )
+ parser.add_argument("--onnx", type=str, default=None)
+ parser.add_argument("--qparam_json", type=str, default=None)
+ parser.add_argument("--engine", type=str, default=None)
+ arg = parser.parse_args()
+
+ build_engine(arg.onnx, arg.qparam_json, arg.engine)
+ print("\033[1;32mgenerate %s\033[0m" % arg.engine)
\ No newline at end of file
diff --git a/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG
index 0275a569d9c68074365cbe75427920818aec93ca..42e3e648cb2839c2017ec5134a9625f46566be73 100644
--- a/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG
+++ b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG
@@ -17,8 +17,8 @@
# MODEL_NAME : 生成onnx/engine的basename
# ORIGINE_MODEL : 原始onnx文件名称
IMGSIZE=224
-MODEL_NAME=EfficientNet_b1
-ORIGINE_MODEL=efficientnet-b1.onnx
+MODEL_NAME=efficientnet_b1
+ORIGINE_MODEL=efficientnet_b1.onnx
# QUANT CONFIG (仅PRECISION为int8时生效)
# QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema]
diff --git a/models/cv/classification/ixrt_common/quant_i8.py b/models/cv/classification/ixrt_common/quant_i8.py
new file mode 100644
index 0000000000000000000000000000000000000000..c728c7a128f7ba5a041160c9452980861c7a9071
--- /dev/null
+++ b/models/cv/classification/ixrt_common/quant_i8.py
@@ -0,0 +1,166 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包:
+
+在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。
+
+This file will show you how to quantize your network with PPQ
+ You should prepare your model and calibration dataset as follow:
+
+ ~/working/model.onnx <-- your model
+ ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset
+
+if you are using caffe model:
+ ~/working/model.caffemdoel <-- your model
+ ~/working/model.prototext <-- your model
+
+### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ###
+
+quantized model will be generated at: ~/working/quantized.onnx
+"""
+from ppq import *
+from ppq.api import *
+import os
+from calibration_dataset import getdataloader
+import argparse
+import random
+import numpy as np
+import torch
+
+
+def setseed(seed=42):
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model_name", type=str)
+ parser.add_argument("--model", type=str)
+ parser.add_argument("--dataset_dir", type=str, default="imagenet_val")
+ parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"],
+ default="hist_percentile")
+ parser.add_argument("--disable_quant_names", nargs='*', type=str)
+ parser.add_argument("--save_dir", type=str, help="save path", default=None)
+ parser.add_argument("--bsz", type=int, default=32)
+ parser.add_argument("--step", type=int, default=20)
+ parser.add_argument("--seed", type=int, default=42)
+ parser.add_argument("--imgsz", type=int, default=224)
+ args = parser.parse_args()
+ print("Quant config:", args)
+ print(args.disable_quant_names)
+ return args
+
+
+config = parse_args()
+
+# modify configuration below:
+WORKING_DIRECTORY = 'checkpoints' # choose your working directory
+TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform
+MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE
+INPUT_LAYOUT = 'chw' # input data layout, chw or hwc
+NETWORK_INPUTSHAPE = [32, 3, 224, 224] # input shape of your network
+EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'.
+REQUIRE_ANALYSE = False
+TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络
+# -------------------------------------------------------------------
+# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式
+# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx
+# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。
+# -------------------------------------------------------------------
+graph = None
+if MODEL_TYPE == NetworkFramework.ONNX:
+ graph = load_onnx_graph(onnx_import_file=config.model)
+if MODEL_TYPE == NetworkFramework.CAFFE:
+ graph = load_caffe_graph(
+ caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'),
+ prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt'))
+assert graph is not None, 'Graph Loading Error, Check your input again.'
+
+# -------------------------------------------------------------------
+# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等
+# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化
+# -------------------------------------------------------------------
+QS = QuantizationSettingFactory.default_setting()
+
+# -------------------------------------------------------------------
+# 下面向你展示了如何使用 finetuning 过程提升量化精度
+# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度
+# 开启他们的方式都是 QS.xxxx = True
+# 按需使用,不要全部打开,容易起飞
+# -------------------------------------------------------------------
+if TRAINING_YOUR_NETWORK:
+ QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差
+ QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟
+ QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu'
+
+
+dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz)
+# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的
+# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可
+with ENABLE_CUDA_KERNEL():
+ print('网络正量化中,根据你的量化配置,这将需要一段时间:')
+ quantized = quantize_native_model(
+ setting=QS, # setting 对象用来控制标准量化逻辑
+ model=graph,
+ calib_dataloader=dataloader,
+ calib_steps=config.step,
+ input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参
+ inputs=None,
+ # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)]
+ collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理,
+ # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None
+ platform=TARGET_PLATFORM,
+ device=EXECUTING_DEVICE,
+ do_quantize=True)
+
+ # -------------------------------------------------------------------
+ # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor
+ # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果
+ # 请注意,必须在 export 之前执行此操作。
+ # -------------------------------------------------------------------
+ executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE)
+ # output = executor.forward(input)
+
+ # -------------------------------------------------------------------
+ # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量
+ # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10%
+ # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差
+ # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的
+ # 你需要使用 layerwise_error_analyse 逐层分析误差的来源
+ # -------------------------------------------------------------------
+ print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:')
+ reports = graphwise_error_analyse(
+ graph=quantized, running_device=EXECUTING_DEVICE, steps=32,
+ dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE))
+ for op, snr in reports.items():
+ if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化')
+
+ if REQUIRE_ANALYSE:
+ print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:')
+ layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE,
+ interested_outputs=None,
+ dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE))
+
+ # -------------------------------------------------------------------
+ # 使用 export_ppq_graph 函数来导出量化后的模型
+ # PPQ 会根据你所选择的导出平台来修改模型格式
+ # -------------------------------------------------------------------
+ print('网络量化结束,正在生成目标文件:')
+ export_ppq_graph(
+ graph=quantized, platform=TARGET_PLATFORM,
+ graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"),
+ config_save_to=os.path.join(config.save_dir, 'quant_cfg.json'))
diff --git a/models/cv/classification/resnetv1d50/ixrt/README.md b/models/cv/classification/resnetv1d50/ixrt/README.md
index 0a5cf2cf44e5bc65ffaae70eaa449ee81dc29e2b..9a8d945de7190080c83437591649145961c7eecb 100644
--- a/models/cv/classification/resnetv1d50/ixrt/README.md
+++ b/models/cv/classification/resnetv1d50/ixrt/README.md
@@ -28,7 +28,7 @@ yum install -y mesa-libGL
apt install -y libgl1-mesa-glx
pip3 install -r ../../ixrt_common/requirments.txt
-pip3 install mmcv==1.5.3 mmcls==0.24.0
+pip3 install mmcv==1.5.3 mmcls==0.24.0 ppq pycuda transformers==4.37.1
```
### Model Conversion
diff --git a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
index 22566b28034a67b41a63152251dfbd32206087cb..d5adbfd5beca26708b1a07d262b46ff661c5bb9b 100644
--- a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh
@@ -26,7 +26,7 @@ else
fi
pip install -r ../../ixrt_common/requirements.txt
-pip install mmcv==1.5.3 mmcls==0.24.0
+pip install mmcv==1.5.3 mmcls==0.24.0 ppq pycuda transformers==4.37.1
unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./
mkdir checkpoints
python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/resnet/resnetv1d50_b32x8_imagenet.py --weight resnetv1d50_b32x8_imagenet_20210531-db14775a.pth --output checkpoints/resnet_v1_d50.onnx
\ No newline at end of file
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
index 482924938efb2a72399df02d9704ea8dd34e82e6..b743d7084ae058118c29daaf494769fc293ceb41 100644
--- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh
@@ -51,8 +51,6 @@ echo RUN_DIR : ${RUN_DIR}
echo CONFIG_DIR : ${CONFIG_DIR}
echo ====================== Model Info ======================
echo Model Name : ${MODEL_NAME}
-echo Model Input Name : ${MODEL_INPUT_NAME}
-echo Model Output Name : ${MODEL_OUTPUT_NAME}
echo Onnx Path : ${ORIGINE_MODEL}
step=0
@@ -71,34 +69,6 @@ else
echo " "Generate ${SIM_MODEL}
fi
-# Quant Model
-if [ $PRECISION == "int8" ];then
- let step++
- echo;
- echo [STEP ${step}] : Quant Model
- if [[ -z ${QUANT_EXIST_ONNX} ]];then
- QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
- fi
- if [[ -f ${QUANT_EXIST_ONNX} ]];then
- SIM_MODEL=${QUANT_EXIST_ONNX}
- echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
- else
- python3 ${RUN_DIR}/quant.py \
- --model ${SIM_MODEL} \
- --model_name ${MODEL_NAME} \
- --dataset_dir ${DATASETS_DIR} \
- --observer ${QUANT_OBSERVER} \
- --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
- --save_dir $CHECKPOINTS_DIR \
- --bsz ${QUANT_BATCHSIZE} \
- --step ${QUANT_STEP} \
- --seed ${QUANT_SEED} \
- --imgsz ${IMGSIZE}
- SIM_MODEL=${QUANT_EXIST_ONNX}
- echo " "Generate ${SIM_MODEL}
- fi
-fi
-
# Change Batchsize
let step++
echo;
@@ -141,4 +111,4 @@ python3 ${RUN_DIR}/inference.py \
--acc_target ${TGT} \
--bsz ${BSZ}; check_status
-exit ${EXIT_STATUS}
\ No newline at end of file
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
index c843057dda987fd834e1fa0580deb2a8cdea17ce..e7a4f1a7276406a0ed7400af4368b5bec2a06e06 100644
--- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh
@@ -51,8 +51,6 @@ echo RUN_DIR : ${RUN_DIR}
echo CONFIG_DIR : ${CONFIG_DIR}
echo ====================== Model Info ======================
echo Model Name : ${MODEL_NAME}
-echo Model Input Name : ${MODEL_INPUT_NAME}
-echo Model Output Name : ${MODEL_OUTPUT_NAME}
echo Onnx Path : ${ORIGINE_MODEL}
step=0
@@ -71,34 +69,6 @@ else
echo " "Generate ${SIM_MODEL}
fi
-# Quant Model
-if [ $PRECISION == "int8" ];then
- let step++
- echo;
- echo [STEP ${step}] : Quant Model
- if [[ -z ${QUANT_EXIST_ONNX} ]];then
- QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx
- fi
- if [[ -f ${QUANT_EXIST_ONNX} ]];then
- SIM_MODEL=${QUANT_EXIST_ONNX}
- echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
- else
- python3 ${RUN_DIR}/quant.py \
- --model ${SIM_MODEL} \
- --model_name ${MODEL_NAME} \
- --dataset_dir ${DATASETS_DIR} \
- --observer ${QUANT_OBSERVER} \
- --disable_quant_names ${DISABLE_QUANT_LIST[@]} \
- --save_dir $CHECKPOINTS_DIR \
- --bsz ${QUANT_BATCHSIZE} \
- --step ${QUANT_STEP} \
- --seed ${QUANT_SEED} \
- --imgsz ${IMGSIZE}
- SIM_MODEL=${QUANT_EXIST_ONNX}
- echo " "Generate ${SIM_MODEL}
- fi
-fi
-
# Change Batchsize
let step++
echo;
@@ -141,4 +111,4 @@ python3 ${RUN_DIR}/inference.py \
--fps_target ${TGT} \
--bsz ${BSZ}; check_status
-exit ${EXIT_STATUS}
\ No newline at end of file
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
index a66d6a253206c49ad68752793ffb1bd7b7f12958..df1fdc610c2332f33d210e6f417cf44da7fef7bd 100644
--- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh
@@ -13,6 +13,7 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
+set -x
EXIT_STATUS=0
check_status()
{
@@ -28,7 +29,7 @@ WARM_UP=0
LOOP_COUNT=-1
RUN_MODE=ACC
PRECISION=int8
-
+export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
# Update arguments
index=0
options=$@
@@ -43,6 +44,7 @@ do
done
source ${CONFIG_DIR}
+echo ${QUANT_OBSERVER}
ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
@@ -60,16 +62,15 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
# Simplify Model
let step++
-echo;
-echo [STEP ${step}] : Simplify Model
-if [ -f ${SIM_MODEL} ];then
- echo " "Simplify Model, ${SIM_MODEL} has been existed
-else
- python3 ${RUN_DIR}/simplify_model.py \
- --origin_model $ORIGINE_MODEL \
- --output_model ${SIM_MODEL}
- echo " "Generate ${SIM_MODEL}
-fi
+ echo [STEP ${step}] : Simplify Model
+ if [ -f ${SIM_MODEL} ];then
+ echo " "Simplify Model, ${SIM_MODEL} has been existed
+ else
+ python3 ${RUN_DIR}/simplify_model.py \
+ --origin_model $ORIGINE_MODEL \
+ --output_model ${SIM_MODEL}
+ echo " "Generate ${SIM_MODEL}
+ fi
# Quant Model
if [ $PRECISION == "int8" ];then
@@ -83,7 +84,7 @@ if [ $PRECISION == "int8" ];then
SIM_MODEL=${QUANT_EXIST_ONNX}
echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
else
- python3 ${RUN_DIR}/quant.py \
+ python3 ${RUN_DIR}/quant_i8.py \
--model ${SIM_MODEL} \
--model_name ${MODEL_NAME} \
--dataset_dir ${DATASETS_DIR} \
@@ -120,15 +121,15 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
if [ -f $ENGINE_FILE ];then
echo " "Build Engine Skip, $ENGINE_FILE has been existed
else
- python3 ${RUN_DIR}/build_engine.py \
- --precision ${PRECISION} \
- --model ${FINAL_MODEL} \
+ python3 ${RUN_DIR}/build_i8_engine.py \
+ --onnx ${FINAL_MODEL} \
+ --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \
--engine ${ENGINE_FILE}
echo " "Generate Engine ${ENGINE_FILE}
fi
# Inference
-let step++
+# let step++
echo;
echo [STEP ${step}] : Inference
python3 ${RUN_DIR}/inference.py \
@@ -141,4 +142,4 @@ python3 ${RUN_DIR}/inference.py \
--acc_target ${TGT} \
--bsz ${BSZ}; check_status
-exit ${EXIT_STATUS}
\ No newline at end of file
+exit ${EXIT_STATUS}
diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
index e578762eb996592b509a8eed995b15b227ae8a86..72ca157b222ba853eb530146099c0cc3bfbb68c9 100644
--- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
+++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh
@@ -28,7 +28,7 @@ WARM_UP=3
LOOP_COUNT=20
RUN_MODE=FPS
PRECISION=int8
-
+export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
# Update arguments
index=0
options=$@
@@ -43,6 +43,7 @@ do
done
source ${CONFIG_DIR}
+echo ${QUANT_OBSERVER}
ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL}
echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR}
@@ -60,7 +61,6 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx
# Simplify Model
let step++
-echo;
echo [STEP ${step}] : Simplify Model
if [ -f ${SIM_MODEL} ];then
echo " "Simplify Model, ${SIM_MODEL} has been existed
@@ -83,7 +83,7 @@ if [ $PRECISION == "int8" ];then
SIM_MODEL=${QUANT_EXIST_ONNX}
echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed
else
- python3 ${RUN_DIR}/quant.py \
+ python3 ${RUN_DIR}/quant_i8.py \
--model ${SIM_MODEL} \
--model_name ${MODEL_NAME} \
--dataset_dir ${DATASETS_DIR} \
@@ -120,15 +120,15 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine
if [ -f $ENGINE_FILE ];then
echo " "Build Engine Skip, $ENGINE_FILE has been existed
else
- python3 ${RUN_DIR}/build_engine.py \
- --precision ${PRECISION} \
- --model ${FINAL_MODEL} \
+ python3 ${RUN_DIR}/build_i8_engine.py \
+ --onnx ${FINAL_MODEL} \
+ --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \
--engine ${ENGINE_FILE}
echo " "Generate Engine ${ENGINE_FILE}
fi
# Inference
-let step++
+# let step++
echo;
echo [STEP ${step}] : Inference
python3 ${RUN_DIR}/inference.py \
@@ -138,7 +138,7 @@ python3 ${RUN_DIR}/inference.py \
--warm_up=${WARM_UP} \
--loop_count ${LOOP_COUNT} \
--test_mode ${RUN_MODE} \
- --fps_target ${TGT} \
+ --acc_target ${TGT} \
--bsz ${BSZ}; check_status
exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
index cd826b9795c96c6b3156d80022667d8a60ab6715..b9671165ce53a144c0f9b16d1e54f3ba824723ab 100644
--- a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
+++ b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh
@@ -27,4 +27,4 @@ fi
pip install -r ../../ixrt_common/requirements.txt
mkdir checkpoints
-python3 export.py ../../ixrt_common/export.py --model-name resnext50_32x4d --weight resnext50_32x4d-7cdf4587.pth --output checkpoints/resnext50_32x4d.onnx
\ No newline at end of file
+python3 ../../ixrt_common/export.py --model-name resnext50_32x4d --weight resnext50_32x4d-7cdf4587.pth --output checkpoints/resnext50_32x4d.onnx
\ No newline at end of file
diff --git a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh
index 09fa1878415ca72f1fd17b9ca6e19b16926756f9..66c8f9d0525bc855866325817dd7ee87aad8989f 100644
--- a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh
+++ b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh
@@ -44,6 +44,6 @@ cd ..
mkdir -p checkpoints
ln -s /root/data/checkpoints/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth ./
-ln -s /root/data/datasets/coco ./
+ln -s /root/data/datasets/coco2017 ./
python3 solo_torch2onnx.py --cfg ./solo_r50_fpn_3x_coco.py --checkpoint ./solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth --batch_size 1
mv r50_solo_bs1_800x800.onnx ./checkpoints/r50_solo_bs1_800x800.onnx
\ No newline at end of file
diff --git a/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh b/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh
index 700282231245007abf2aad21967073edd312c77e..5ddfdcb2a001023d573acfbeb67e7bb5b5f9e707 100644
--- a/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh
+++ b/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh
@@ -18,7 +18,8 @@ set -x
pip3 install -r requirements.txt
-# clone fast-reid first
+# install fast-reid
+git clone https://github.com/JDAI-CV/fast-reid.git --depth=1
cd fast-reid
pip3 install -r docs/requirements.txt
diff --git a/models/cv/object_detection/foveabox/ixrt/ci/prepare.sh b/models/cv/object_detection/foveabox/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..507d455e4d7a3b7e169cf7c422331910ce8eaa88
--- /dev/null
+++ b/models/cv/object_detection/foveabox/ixrt/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+
+python3 export.py --weight fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth --cfg fovea_r50_fpn_4xb4-1x_coco.py --output foveabox.onnx
+
+onnxsim foveabox.onnx foveabox_opt.onnx
\ No newline at end of file
diff --git a/models/cv/object_detection/fsaf/ixrt/ci/prepare.sh b/models/cv/object_detection/fsaf/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..67155f5998e7d58e3116cd34e7dd8320b37f5437
--- /dev/null
+++ b/models/cv/object_detection/fsaf/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+# export onnx model
+python3 export.py --weight fsaf_r50_fpn_1x_coco-94ccc51f.pth --cfg fsaf_r50_fpn_1x_coco.py --output fsaf.onnx
+
+# use onnxsim optimize onnx model
+onnxsim fsaf.onnx fsaf_opt.onnx
\ No newline at end of file
diff --git a/models/cv/object_detection/hrnet/ixrt/ci/prepare.sh b/models/cv/object_detection/hrnet/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cbc14791567345842f24b3082733dee73dd56776
--- /dev/null
+++ b/models/cv/object_detection/hrnet/ixrt/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+# export onnx model
+python3 export.py --weight fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth --cfg fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py --output hrnet.onnx
+
+# Use onnxsim optimize onnx model
+onnxsim hrnet.onnx hrnet_opt.onnx
\ No newline at end of file
diff --git a/models/cv/object_detection/retinaface/igie/inference.py b/models/cv/object_detection/retinaface/igie/inference.py
index 44d29d1b9e1c8075c1212cecbe13ed683ebf1d7f..86782697f78fd90308c571d50e70410dcc245344 100644
--- a/models/cv/object_detection/retinaface/igie/inference.py
+++ b/models/cv/object_detection/retinaface/igie/inference.py
@@ -132,7 +132,7 @@ class FaceDataset(Dataset):
return np.concatenate([i[None] for i in im], axis=0), path, shapes, path_ori
def _load_image(self, i):
- im = cv2.imread(self.img_dir+'/images'+self.imgs_path[i], cv2.IMREAD_COLOR)
+ im = cv2.imread(self.img_dir+'/images/'+self.imgs_path[i], cv2.IMREAD_COLOR)
h0, w0 = im.shape[:2]
r = self.image_size / max(h0, w0)
if r != 1:
diff --git a/models/cv/object_detection/retinaface/ixrt/README.md b/models/cv/object_detection/retinaface/ixrt/README.md
index 67ce9e3cdccf9bc8326ff98572515ebddce4fa9e..2323b20fe2d009e7c9ad217f858084e196a524ec 100644
--- a/models/cv/object_detection/retinaface/ixrt/README.md
+++ b/models/cv/object_detection/retinaface/ixrt/README.md
@@ -47,7 +47,7 @@ python3 torch2onnx.py --model mobilenet0.25_Final.pth --onnx_model mnetv1_retina
```bash
export DATASETS_DIR=/Path/to/widerface/
-export GT_DIR=../igie/ground_truth
+export GT_DIR=../igie/widerface_evaluate/ground_truth
```
### FP16
diff --git a/models/cv/object_detection/retinaface/ixrt/ci/prepare.sh b/models/cv/object_detection/retinaface/ixrt/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0796d90b857d7d36426bdfd9e38631681b9c0c04
--- /dev/null
+++ b/models/cv/object_detection/retinaface/ixrt/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+pip3 install -r requirements.txt
+
+python3 setup.py build_ext --inplace
+# export onnx model
+python3 torch2onnx.py --model mobilenet0.25_Final.pth --onnx_model mnetv1_retinaface.onnx
\ No newline at end of file
diff --git a/models/cv/object_detection/retinaface/ixrt/evaluation.py b/models/cv/object_detection/retinaface/ixrt/evaluation.py
index d3c6495aaf782725761a52c9d2442564b482155d..8d1a9e0731532faf547aa96417edc1a6da2fda9b 100644
--- a/models/cv/object_detection/retinaface/ixrt/evaluation.py
+++ b/models/cv/object_detection/retinaface/ixrt/evaluation.py
@@ -289,5 +289,7 @@ def evaluation(pred, gt_path, iou_thresh=0.5):
print("Medium Val AP: {}".format(aps[1]))
print("Hard Val AP: {}".format(aps[2]))
print("=================================================")
+ metricResult = {"metricResult": {"Easy Val AP": aps[0], "Medium Val AP": aps[1], "Hard Val AP": aps[2]}}
+ print(metricResult)
return aps[0]
\ No newline at end of file
diff --git a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh
index 5722980c09728dc0e0fc0bd131bc51c126fa0290..3fca161ad63023affdf118ba3e312ce94455b13b 100644
--- a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh
+++ b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh
@@ -44,6 +44,7 @@ do
done
PROJ_DIR=${PROJ_DIR:-"."}
+GT_DIR=${GT_DIR:-"../igie/widerface_evaluate/ground_truth"}
DATASETS_DIR="${DATASETS_DIR}"
CHECKPOINTS_DIR="${PROJ_DIR}"
RUN_DIR="${PROJ_DIR}"
diff --git a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh
index ddbcc65577d11410149adeb1c08cdbb305651ce5..cb486381f32bd88954137ccb6a4c44d963c50382 100644
--- a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh
+++ b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh
@@ -44,6 +44,7 @@ do
done
PROJ_DIR=${PROJ_DIR:-"."}
+GT_DIR=${GT_DIR:-"../igie/widerface_evaluate/ground_truth"}
DATASETS_DIR="${DATASETS_DIR}"
CHECKPOINTS_DIR="${PROJ_DIR}"
RUN_DIR="${PROJ_DIR}"
diff --git a/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py b/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py
index 8095cb6f05897c19d387230755b32c1ae6ad3352..e383834f688e59746408f3873d0c3794db5e43bd 100644
--- a/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py
+++ b/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py
@@ -48,9 +48,9 @@ class WiderFaceDetection(data.Dataset):
def __init__(self, prj_dir, preproc=lt_preproc, input_size=(320, 320)):
self.preproc = preproc
self.input_size = input_size
- self.image_dir = os.path.join(prj_dir, "images")
+ self.image_dir = os.path.join(prj_dir, "val/images")
- testset_list = os.path.join(prj_dir, "wider_val.txt")
+ testset_list = os.path.join(prj_dir, "val/wider_val.txt")
with open(testset_list, 'r') as fr:
self.imgs_path = fr.read().split()
diff --git a/models/cv/object_detection/yolov3/igie/requirements.txt b/models/cv/object_detection/yolov3/igie/requirements.txt
index 171602527bdea43ee2216f9ad4629d83cfd92e38..d58360efcf45ff6176a24cc37580d08ef176381b 100644
--- a/models/cv/object_detection/yolov3/igie/requirements.txt
+++ b/models/cv/object_detection/yolov3/igie/requirements.txt
@@ -1,5 +1,5 @@
tqdm
onnx
onnxsim
-ultralytics
+ultralytics==8.3.97
pycocotools
diff --git a/models/cv/object_detection/yolov5/igie/requirements.txt b/models/cv/object_detection/yolov5/igie/requirements.txt
index 171602527bdea43ee2216f9ad4629d83cfd92e38..d58360efcf45ff6176a24cc37580d08ef176381b 100644
--- a/models/cv/object_detection/yolov5/igie/requirements.txt
+++ b/models/cv/object_detection/yolov5/igie/requirements.txt
@@ -1,5 +1,5 @@
tqdm
onnx
onnxsim
-ultralytics
+ultralytics==8.3.97
pycocotools
diff --git a/models/cv/object_detection/yolov5/ixrt/requirements.txt b/models/cv/object_detection/yolov5/ixrt/requirements.txt
index b0f4374b2b778c81875da50d088fecedd01689c9..10a9fba6a70545eee20ab0db7bb740b1d4807f95 100644
--- a/models/cv/object_detection/yolov5/ixrt/requirements.txt
+++ b/models/cv/object_detection/yolov5/ixrt/requirements.txt
@@ -1,7 +1,7 @@
tqdm
onnx
onnxsim
-ultralytics
+ultralytics==8.3.97
pycocotools
opencv-python==4.6.0.66
pycuda
\ No newline at end of file
diff --git a/models/cv/object_detection/yolov5s/ixrt/requirements.txt b/models/cv/object_detection/yolov5s/ixrt/requirements.txt
index ffb8ce179fef26f79070045778708b03b8111fce..b1a10ab060644ea96d6ad77b36dbc4367a632591 100644
--- a/models/cv/object_detection/yolov5s/ixrt/requirements.txt
+++ b/models/cv/object_detection/yolov5s/ixrt/requirements.txt
@@ -1,6 +1,6 @@
tqdm
onnx
onnxsim
-ultralytics
+ultralytics==8.3.97
pycocotools
pycuda
\ No newline at end of file
diff --git a/models/cv/object_detection/yolov7/ixrt/requirements.txt b/models/cv/object_detection/yolov7/ixrt/requirements.txt
index b0f4374b2b778c81875da50d088fecedd01689c9..10a9fba6a70545eee20ab0db7bb740b1d4807f95 100644
--- a/models/cv/object_detection/yolov7/ixrt/requirements.txt
+++ b/models/cv/object_detection/yolov7/ixrt/requirements.txt
@@ -1,7 +1,7 @@
tqdm
onnx
onnxsim
-ultralytics
+ultralytics==8.3.97
pycocotools
opencv-python==4.6.0.66
pycuda
\ No newline at end of file
diff --git a/models/cv/ocr/kie_layoutxlm/igie/requirements.txt b/models/cv/ocr/kie_layoutxlm/igie/requirements.txt
index ede2fc9df150acb6ef4821e922de2ea645355f7d..2f2f00126b5801c5996ec60782011f59875bf740 100644
--- a/models/cv/ocr/kie_layoutxlm/igie/requirements.txt
+++ b/models/cv/ocr/kie_layoutxlm/igie/requirements.txt
@@ -5,6 +5,6 @@ Polygon3
paddlenlp==2.8.1
lanms-neo==1.0.2
paddleocr==2.6.0
-paddle2onnx
+paddle2onnx==1.3.0
python-bidi
protobuf==3.20.3
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/aria/vllm/README.md b/models/multimodal/vision_language_model/aria/vllm/README.md
index ae768ce6f06f8558014b34aaee00df7d45855751..7ef43e546b4ecc15f06f2d5dc7d7e8acc0aa404c 100644
--- a/models/multimodal/vision_language_model/aria/vllm/README.md
+++ b/models/multimodal/vision_language_model/aria/vllm/README.md
@@ -37,6 +37,8 @@ In order to run the model smoothly, you need to get the sdk from [resource cente
yum install -y mesa-libGL
## Ubuntu
apt install -y libgl1-mesa-glx
+
+pip install transformer==4.48.0
```
## Model Inference
diff --git a/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh
index 7232aa2996f379a961cf931968a1319fb70ac091..ff5f4e533365daff8abc4c396d8b9206ec3cd6b3 100644
--- a/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh
+++ b/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh
@@ -25,3 +25,4 @@ else
fi
cp -r ../../vllm_public_assets/ ./
+pip install transformer==4.48.0
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md b/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md
index 1ed7c9116c970df30b47800496835aac9a0016c9..7a488b0a320202b0914a55f724627eb98eb482b6 100755
--- a/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md
+++ b/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md
@@ -17,6 +17,8 @@ Chameleon, an AI system that mitigates these limitations by augmenting LLMs with
- Model:
```bash
+cp -r ../../vllm_public_assets/ ./
+
# Download model from the website and make sure the model's path is "data/chameleon-7b"
mkdir data
```
diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/chameleon_7b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091
--- /dev/null
+++ b/models/multimodal/vision_language_model/chameleon_7b/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/utils.py b/models/multimodal/vision_language_model/chameleon_7b/vllm/utils.py
deleted file mode 100644
index 48445ed97d08a8388a90d20e026609b5c1e88a99..0000000000000000000000000000000000000000
--- a/models/multimodal/vision_language_model/chameleon_7b/vllm/utils.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License. You may obtain
-# a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-import argparse
-import codecs
-import logging
-
-"""
-The following arguments can not be add in args...
-early_stopping: Union[bool, str] = False,
-early_stopping: Controls the stopping condition for beam search. It
- accepts the following values: `True`, where the generation stops as
- soon as there are `best_of` complete candidates; `False`, where an
- heuristic is applied and the generation stops when is it very
- unlikely to find better candidates; `"never"`, where the beam search
- procedure only stops when there cannot be better candidates
- (canonical beam search algorithm).
-stop: Optional[Union[str, List[str]]] = None,
-stop_token_ids: Optional[List[int]] = None,
-logits_processors: Optional[List[LogitsProcessor]] = None,
-logits_processors: List of functions that modify logits based on
- previously generated tokens, and optionally prompt tokens as
- a first argument.
-truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None,
-truncate_prompt_tokens: If set to an integer k, will use only the last k
- tokens from the prompt (i.e., left truncation). Defaults to None
- (i.e., no truncation).
- """
-
-
-def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
- args.add_argument(
- "--n",
- type=int,
- default=1,
- help="Number of output sequences to return for the given prompt.",
- )
- args.add_argument(
- "--best-of",
- type=int,
- default=None,
- help="Number of output sequences that are generated from the prompt. "
- "From these `best_of` sequences, the top `n` sequences are returned. "
- "`best_of` must be greater than or equal to `n`. This is treated as "
- "the beam width when `use_beam_search` is True. By default, `best_of`"
- "is set to `n`.",
- )
- args.add_argument(
- "--presence-penalty",
- type=float,
- default=0.0,
- help="Float that penalizes new tokens based on whether they "
- "appear in the generated text so far. Values > 0 encourage the model "
- "to use new tokens, while values < 0 encourage the model to repeat "
- "tokens.",
- )
- args.add_argument(
- "--frequency-penalty",
- type=float,
- default=0.0,
- help="Float that penalizes new tokens based on their "
- " frequency in the generated text so far. Values > 0 encourage the "
- " model to use new tokens, while values < 0 encourage the model to "
- "repeat tokens.",
- )
- args.add_argument(
- "--repetition-penalty",
- type=float,
- default=1.0,
- help="Float that penalizes new tokens based on whether "
- "they appear in the prompt and the generated text so far. Values > 1 "
- "encourage the model to use new tokens, while values < 1 encourage "
- "the model to repeat tokens.",
- )
- args.add_argument(
- "--temperature",
- type=float,
- default=1.0,
- help="Float that controls the randomness of the sampling. Lower "
- "values make the model more deterministic, while higher values make "
- "the model more random. Zero means greedy sampling.",
- )
- args.add_argument(
- "--top-p",
- type=float,
- default=1.0,
- help="Float that controls the cumulative probability of the top tokens "
- "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.",
- )
- args.add_argument(
- "--top-k",
- type=int,
- default=-1,
- help="Integer that controls the number of top tokens to consider. Set "
- "to -1 to consider all tokens.",
- )
- args.add_argument(
- "--min-p",
- type=float,
- default=0.0,
- help="Float that represents the minimum probability for a token to be "
- "considered, relative to the probability of the most likely token. "
- "Must be in [0, 1]. Set to 0 to disable this.",
- )
- args.add_argument(
- "--use-beam-search",
- default=False,
- action="store_true",
- help="Whether to use beam search instead of sampling.",
- )
- args.add_argument(
- "--length-penalty",
- type=float,
- default=1.0,
- help="Float that penalizes sequences based on their length. Used in beam search.",
- )
- args.add_argument(
- "--stop",
- type=str,
- default=None,
- help="List of strings that stop the generation when they are generated. "
- "The returned output will not contain the stop strings.",
- )
- args.add_argument(
- "--stop-token-ids",
- type=int,
- default=None,
- help="List of tokens that stop the generation when they are "
- "generated. The returned output will contain the stop tokens unless "
- "the stop tokens are special tokens.",
- )
- args.add_argument(
- "--include-stop-str-in-output",
- default=False,
- action="store_true",
- help="Whether to include the stop strings in output text. Defaults to False.",
- )
- args.add_argument(
- "--ignore-eos",
- default=False,
- action="store_true",
- help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.",
- )
- args.add_argument(
- "--max-tokens",
- type=int,
- default=16,
- help="Maximum number of tokens to generate per output sequence.",
- )
- args.add_argument(
- "--min-tokens",
- type=int,
- default=0,
- help="Minimum number of tokens to generate per output sequence "
- "before EOS or stop_token_ids can be generated",
- )
- args.add_argument(
- "--logprobs",
- type=int,
- default=None,
- help="NNumber of log probabilities to return per output token. "
- "Note that the implementation follows the OpenAI API: The return "
- "result includes the log probabilities on the `logprobs` most likely "
- "tokens, as well the chosen tokens. The API will always return the "
- "log probability of the sampled token, so there may be up to "
- "`logprobs+1` elements in the response.",
- )
- args.add_argument(
- "--prompt-logprobs",
- type=int,
- default=None,
- help="Number of log probabilities to return per prompt token.",
- )
- args.add_argument(
- "--detokenize",
- type=bool,
- default=True,
- help="Whether to detokenize the output. Defaults to True.",
- )
- args.add_argument(
- "--skip-special-tokens",
- default=True,
- action="store_false",
- help="Whether to skip special tokens in the output.",
- )
- args.add_argument(
- "--spaces-between-special-tokens",
- default=True,
- action="store_false",
- help="Whether to add spaces between special tokens in the output. Defaults to True.",
- )
- return args
-
-
-def load_chat_template(tokenizer, chat_template):
- if chat_template is not None:
- try:
- with open(chat_template, "r") as f:
- tokenizer.chat_template = f.read()
- except OSError:
- # If opening a file fails, set chat template to be args to
- # ensure we decode so our escape are interpreted correctly
- tokenizer.chat_template = codecs.decode(chat_template, "unicode_escape")
-
- logging.info(f"Using supplied chat template:\n{tokenizer.chat_template}")
- elif tokenizer.chat_template is not None:
- logging.info(
- f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
- )
- else:
- logging.warning(
- "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
- )
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg
deleted file mode 100644
index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000
Binary files a/models/multimodal/vision_language_model/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ
diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md b/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md
index f751f8c4db94a5b7c1e170ead59ec7ad40fcfc9c..d13e0b364e215b3c4479edd6f0ee8072977f1e36 100755
--- a/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md
+++ b/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md
@@ -21,6 +21,8 @@ transformer decoder like an image transformer (albeit with no pooling and causal
- Model:
```bash
+cp -r ../../vllm_public_assets/ ./
+
# Download model from the website and make sure the model's path is "data/fuyu-8b"
mkdir data/
```
diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091
--- /dev/null
+++ b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/utils.py b/models/multimodal/vision_language_model/fuyu_8b/vllm/utils.py
deleted file mode 100644
index 48445ed97d08a8388a90d20e026609b5c1e88a99..0000000000000000000000000000000000000000
--- a/models/multimodal/vision_language_model/fuyu_8b/vllm/utils.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License. You may obtain
-# a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-import argparse
-import codecs
-import logging
-
-"""
-The following arguments can not be add in args...
-early_stopping: Union[bool, str] = False,
-early_stopping: Controls the stopping condition for beam search. It
- accepts the following values: `True`, where the generation stops as
- soon as there are `best_of` complete candidates; `False`, where an
- heuristic is applied and the generation stops when is it very
- unlikely to find better candidates; `"never"`, where the beam search
- procedure only stops when there cannot be better candidates
- (canonical beam search algorithm).
-stop: Optional[Union[str, List[str]]] = None,
-stop_token_ids: Optional[List[int]] = None,
-logits_processors: Optional[List[LogitsProcessor]] = None,
-logits_processors: List of functions that modify logits based on
- previously generated tokens, and optionally prompt tokens as
- a first argument.
-truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None,
-truncate_prompt_tokens: If set to an integer k, will use only the last k
- tokens from the prompt (i.e., left truncation). Defaults to None
- (i.e., no truncation).
- """
-
-
-def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
- args.add_argument(
- "--n",
- type=int,
- default=1,
- help="Number of output sequences to return for the given prompt.",
- )
- args.add_argument(
- "--best-of",
- type=int,
- default=None,
- help="Number of output sequences that are generated from the prompt. "
- "From these `best_of` sequences, the top `n` sequences are returned. "
- "`best_of` must be greater than or equal to `n`. This is treated as "
- "the beam width when `use_beam_search` is True. By default, `best_of`"
- "is set to `n`.",
- )
- args.add_argument(
- "--presence-penalty",
- type=float,
- default=0.0,
- help="Float that penalizes new tokens based on whether they "
- "appear in the generated text so far. Values > 0 encourage the model "
- "to use new tokens, while values < 0 encourage the model to repeat "
- "tokens.",
- )
- args.add_argument(
- "--frequency-penalty",
- type=float,
- default=0.0,
- help="Float that penalizes new tokens based on their "
- " frequency in the generated text so far. Values > 0 encourage the "
- " model to use new tokens, while values < 0 encourage the model to "
- "repeat tokens.",
- )
- args.add_argument(
- "--repetition-penalty",
- type=float,
- default=1.0,
- help="Float that penalizes new tokens based on whether "
- "they appear in the prompt and the generated text so far. Values > 1 "
- "encourage the model to use new tokens, while values < 1 encourage "
- "the model to repeat tokens.",
- )
- args.add_argument(
- "--temperature",
- type=float,
- default=1.0,
- help="Float that controls the randomness of the sampling. Lower "
- "values make the model more deterministic, while higher values make "
- "the model more random. Zero means greedy sampling.",
- )
- args.add_argument(
- "--top-p",
- type=float,
- default=1.0,
- help="Float that controls the cumulative probability of the top tokens "
- "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.",
- )
- args.add_argument(
- "--top-k",
- type=int,
- default=-1,
- help="Integer that controls the number of top tokens to consider. Set "
- "to -1 to consider all tokens.",
- )
- args.add_argument(
- "--min-p",
- type=float,
- default=0.0,
- help="Float that represents the minimum probability for a token to be "
- "considered, relative to the probability of the most likely token. "
- "Must be in [0, 1]. Set to 0 to disable this.",
- )
- args.add_argument(
- "--use-beam-search",
- default=False,
- action="store_true",
- help="Whether to use beam search instead of sampling.",
- )
- args.add_argument(
- "--length-penalty",
- type=float,
- default=1.0,
- help="Float that penalizes sequences based on their length. Used in beam search.",
- )
- args.add_argument(
- "--stop",
- type=str,
- default=None,
- help="List of strings that stop the generation when they are generated. "
- "The returned output will not contain the stop strings.",
- )
- args.add_argument(
- "--stop-token-ids",
- type=int,
- default=None,
- help="List of tokens that stop the generation when they are "
- "generated. The returned output will contain the stop tokens unless "
- "the stop tokens are special tokens.",
- )
- args.add_argument(
- "--include-stop-str-in-output",
- default=False,
- action="store_true",
- help="Whether to include the stop strings in output text. Defaults to False.",
- )
- args.add_argument(
- "--ignore-eos",
- default=False,
- action="store_true",
- help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.",
- )
- args.add_argument(
- "--max-tokens",
- type=int,
- default=16,
- help="Maximum number of tokens to generate per output sequence.",
- )
- args.add_argument(
- "--min-tokens",
- type=int,
- default=0,
- help="Minimum number of tokens to generate per output sequence "
- "before EOS or stop_token_ids can be generated",
- )
- args.add_argument(
- "--logprobs",
- type=int,
- default=None,
- help="NNumber of log probabilities to return per output token. "
- "Note that the implementation follows the OpenAI API: The return "
- "result includes the log probabilities on the `logprobs` most likely "
- "tokens, as well the chosen tokens. The API will always return the "
- "log probability of the sampled token, so there may be up to "
- "`logprobs+1` elements in the response.",
- )
- args.add_argument(
- "--prompt-logprobs",
- type=int,
- default=None,
- help="Number of log probabilities to return per prompt token.",
- )
- args.add_argument(
- "--detokenize",
- type=bool,
- default=True,
- help="Whether to detokenize the output. Defaults to True.",
- )
- args.add_argument(
- "--skip-special-tokens",
- default=True,
- action="store_false",
- help="Whether to skip special tokens in the output.",
- )
- args.add_argument(
- "--spaces-between-special-tokens",
- default=True,
- action="store_false",
- help="Whether to add spaces between special tokens in the output. Defaults to True.",
- )
- return args
-
-
-def load_chat_template(tokenizer, chat_template):
- if chat_template is not None:
- try:
- with open(chat_template, "r") as f:
- tokenizer.chat_template = f.read()
- except OSError:
- # If opening a file fails, set chat template to be args to
- # ensure we decode so our escape are interpreted correctly
- tokenizer.chat_template = codecs.decode(chat_template, "unicode_escape")
-
- logging.info(f"Using supplied chat template:\n{tokenizer.chat_template}")
- elif tokenizer.chat_template is not None:
- logging.info(
- f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
- )
- else:
- logging.warning(
- "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
- )
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg
deleted file mode 100644
index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000
Binary files a/models/multimodal/vision_language_model/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ
diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/README.md b/models/multimodal/vision_language_model/intern_vl/vllm/README.md
index 78bb8d1b1297bba864816057c0192193e85f8849..c337a34094d9a2c4666cb2d3126aa3f64dcccc2d 100644
--- a/models/multimodal/vision_language_model/intern_vl/vllm/README.md
+++ b/models/multimodal/vision_language_model/intern_vl/vllm/README.md
@@ -21,6 +21,7 @@ learning.
```bash
cd ${DeepSparkInference}/models/vision-language-understanding/Intern_VL/vllm
+cp -r ../../vllm_public_assets/ ./
mkdir -p data/intern_vl
ln -s /path/to/InternVL2-4B ./data/intern_vl
```
diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/intern_vl/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091
--- /dev/null
+++ b/models/multimodal/vision_language_model/intern_vl/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/utils.py b/models/multimodal/vision_language_model/intern_vl/vllm/utils.py
deleted file mode 100644
index c6def85dedc08ef9c3a489ce9dc5b1ff4a5e48b0..0000000000000000000000000000000000000000
--- a/models/multimodal/vision_language_model/intern_vl/vllm/utils.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License. You may obtain
-# a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-
-import codecs
-import logging
-import argparse
-
-
-def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
- args.add_argument(
- '--n',
- type=int,
- default=1,
- help="Number of output sequences to return for the given prompt.")
- args.add_argument(
- '--best-of',
- type=int,
- default=None,
- help="Number of output sequences that are generated from the prompt. "
- "From these `best_of` sequences, the top `n` sequences are returned. "
- "`best_of` must be greater than or equal to `n`. This is treated as "
- "the beam width when `use_beam_search` is True. By default, `best_of`"
- "is set to `n`.")
- args.add_argument(
- '--presence-penalty',
- type=float,
- default=0.0,
- help="Float that penalizes new tokens based on whether they "
- "appear in the generated text so far. Values > 0 encourage the model "
- "to use new tokens, while values < 0 encourage the model to repeat "
- "tokens.")
- args.add_argument(
- '--frequency-penalty',
- type=float,
- default=0.0,
- help="Float that penalizes new tokens based on their "
- " frequency in the generated text so far. Values > 0 encourage the "
- " model to use new tokens, while values < 0 encourage the model to "
- "repeat tokens.")
- args.add_argument(
- '--repetition-penalty',
- type=float,
- default=1.0,
- help="Float that penalizes new tokens based on whether "
- "they appear in the prompt and the generated text so far. Values > 1 "
- "encourage the model to use new tokens, while values < 1 encourage "
- "the model to repeat tokens.")
- args.add_argument(
- '--temperature',
- type=float,
- default=1.0,
- help="Float that controls the randomness of the sampling. Lower "
- "values make the model more deterministic, while higher values make "
- "the model more random. Zero means greedy sampling.")
- args.add_argument(
- '--top-p',
- type=float,
- default=1.0,
- help="Float that controls the cumulative probability of the top tokens "
- "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.")
- args.add_argument(
- '--top-k',
- type=int,
- default=-1,
- help="Integer that controls the number of top tokens to consider. Set "
- "to -1 to consider all tokens.")
- args.add_argument(
- '--min-p',
- type=float,
- default=0.0,
- help="Float that represents the minimum probability for a token to be "
- "considered, relative to the probability of the most likely token. "
- "Must be in [0, 1]. Set to 0 to disable this.")
- args.add_argument(
- '--use-beam-search',
- default=False,
- action="store_true",
- help="Whether to use beam search instead of sampling.")
- args.add_argument(
- '--length-penalty',
- type=float,
- default=1.0,
- help="Float that penalizes sequences based on their length. Used in beam search.")
- args.add_argument(
- '--stop',
- type=str,
- default=None,
- help="List of strings that stop the generation when they are generated. "
- "The returned output will not contain the stop strings.")
- args.add_argument(
- '--stop-token-ids',
- type=int,
- default=None,
- help="List of tokens that stop the generation when they are "
- "generated. The returned output will contain the stop tokens unless "
- "the stop tokens are special tokens.")
- args.add_argument(
- '--include-stop-str-in-output',
- default=False,
- action="store_true",
- help="Whether to include the stop strings in output text. Defaults to False.")
- args.add_argument(
- '--ignore-eos',
- default=False,
- action="store_true",
- help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.")
- args.add_argument(
- '--max-tokens',
- type=int,
- default=16,
- help="Maximum number of tokens to generate per output sequence.")
- args.add_argument(
- '--logprobs',
- type=int,
- default=None,
- help="NNumber of log probabilities to return per output token. "
- "Note that the implementation follows the OpenAI API: The return "
- "result includes the log probabilities on the `logprobs` most likely "
- "tokens, as well the chosen tokens. The API will always return the "
- "log probability of the sampled token, so there may be up to "
- "`logprobs+1` elements in the response.")
- args.add_argument(
- '--prompt-logprobs',
- type=int,
- default=None,
- help="Number of log probabilities to return per prompt token.")
- args.add_argument(
- '--skip-special-tokens',
- default=True,
- action="store_false",
- help="Whether to skip special tokens in the output.")
- args.add_argument(
- '--spaces-between-special-tokens',
- default=True,
- action="store_false",
- help="Whether to add spaces between special tokens in the output. Defaults to True.")
- # early_stopping logits_processors seed
- return args
-
-
-def load_chat_template(tokenizer, chat_template):
- if chat_template is not None:
- try:
- with open(chat_template, "r") as f:
- tokenizer.chat_template = f.read()
- except OSError:
- # If opening a file fails, set chat template to be args to
- # ensure we decode so our escape are interpreted correctly
- tokenizer.chat_template = codecs.decode(
- chat_template, "unicode_escape")
-
- logging.info(
- f"Using supplied chat template:\n{tokenizer.chat_template}"
- )
- elif tokenizer.chat_template is not None:
- logging.info(
- f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
- )
- else:
- logging.warning(
- "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm.")
diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg
deleted file mode 100644
index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000
Binary files a/models/multimodal/vision_language_model/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ
diff --git a/models/multimodal/vision_language_model/llava/vllm/README.md b/models/multimodal/vision_language_model/llava/vllm/README.md
index 599b66f04af0d6d093fd96be05febbad896292fd..78a2119013b612c6e26f517339cf634fa1677b54 100644
--- a/models/multimodal/vision_language_model/llava/vllm/README.md
+++ b/models/multimodal/vision_language_model/llava/vllm/README.md
@@ -22,6 +22,8 @@ reasoning.
-llava-v1.6-vicuna-7b-hf:
```bash
+cp -r ../../vllm_public_assets/ ./
+
# Download model from the website and make sure the model's path is "data/llava"
mkdir data/
```
diff --git a/models/multimodal/vision_language_model/llava/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/llava/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091
--- /dev/null
+++ b/models/multimodal/vision_language_model/llava/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+cp -r ../../vllm_public_assets/ ./
diff --git a/models/multimodal/vision_language_model/llava/vllm/utils.py b/models/multimodal/vision_language_model/llava/vllm/utils.py
deleted file mode 100644
index 11f23209a3175f0200ac6b5c499765101e3c3a0a..0000000000000000000000000000000000000000
--- a/models/multimodal/vision_language_model/llava/vllm/utils.py
+++ /dev/null
@@ -1,225 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License. You may obtain
-# a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-
-import argparse
-import codecs
-import logging
-
-"""
-The following arguments can not be add in args...
-early_stopping: Union[bool, str] = False,
-early_stopping: Controls the stopping condition for beam search. It
- accepts the following values: `True`, where the generation stops as
- soon as there are `best_of` complete candidates; `False`, where an
- heuristic is applied and the generation stops when is it very
- unlikely to find better candidates; `"never"`, where the beam search
- procedure only stops when there cannot be better candidates
- (canonical beam search algorithm).
-stop: Optional[Union[str, List[str]]] = None,
-stop_token_ids: Optional[List[int]] = None,
-logits_processors: Optional[List[LogitsProcessor]] = None,
-logits_processors: List of functions that modify logits based on
- previously generated tokens, and optionally prompt tokens as
- a first argument.
-truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None,
-truncate_prompt_tokens: If set to an integer k, will use only the last k
- tokens from the prompt (i.e., left truncation). Defaults to None
- (i.e., no truncation).
- """
-
-
-def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
- args.add_argument(
- "--n",
- type=int,
- default=1,
- help="Number of output sequences to return for the given prompt.",
- )
- args.add_argument(
- "--best-of",
- type=int,
- default=None,
- help="Number of output sequences that are generated from the prompt. "
- "From these `best_of` sequences, the top `n` sequences are returned. "
- "`best_of` must be greater than or equal to `n`. This is treated as "
- "the beam width when `use_beam_search` is True. By default, `best_of`"
- "is set to `n`.",
- )
- args.add_argument(
- "--presence-penalty",
- type=float,
- default=0.0,
- help="Float that penalizes new tokens based on whether they "
- "appear in the generated text so far. Values > 0 encourage the model "
- "to use new tokens, while values < 0 encourage the model to repeat "
- "tokens.",
- )
- args.add_argument(
- "--frequency-penalty",
- type=float,
- default=0.0,
- help="Float that penalizes new tokens based on their "
- " frequency in the generated text so far. Values > 0 encourage the "
- " model to use new tokens, while values < 0 encourage the model to "
- "repeat tokens.",
- )
- args.add_argument(
- "--repetition-penalty",
- type=float,
- default=1.0,
- help="Float that penalizes new tokens based on whether "
- "they appear in the prompt and the generated text so far. Values > 1 "
- "encourage the model to use new tokens, while values < 1 encourage "
- "the model to repeat tokens.",
- )
- args.add_argument(
- "--temperature",
- type=float,
- default=1.0,
- help="Float that controls the randomness of the sampling. Lower "
- "values make the model more deterministic, while higher values make "
- "the model more random. Zero means greedy sampling.",
- )
- args.add_argument(
- "--top-p",
- type=float,
- default=1.0,
- help="Float that controls the cumulative probability of the top tokens "
- "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.",
- )
- args.add_argument(
- "--top-k",
- type=int,
- default=-1,
- help="Integer that controls the number of top tokens to consider. Set "
- "to -1 to consider all tokens.",
- )
- args.add_argument(
- "--min-p",
- type=float,
- default=0.0,
- help="Float that represents the minimum probability for a token to be "
- "considered, relative to the probability of the most likely token. "
- "Must be in [0, 1]. Set to 0 to disable this.",
- )
- args.add_argument(
- "--use-beam-search",
- default=False,
- action="store_true",
- help="Whether to use beam search instead of sampling.",
- )
- args.add_argument(
- "--length-penalty",
- type=float,
- default=1.0,
- help="Float that penalizes sequences based on their length. Used in beam search.",
- )
- args.add_argument(
- "--stop",
- type=str,
- default=None,
- help="List of strings that stop the generation when they are generated. "
- "The returned output will not contain the stop strings.",
- )
- args.add_argument(
- "--stop-token-ids",
- type=int,
- default=None,
- help="List of tokens that stop the generation when they are "
- "generated. The returned output will contain the stop tokens unless "
- "the stop tokens are special tokens.",
- )
- args.add_argument(
- "--include-stop-str-in-output",
- default=False,
- action="store_true",
- help="Whether to include the stop strings in output text. Defaults to False.",
- )
- args.add_argument(
- "--ignore-eos",
- default=False,
- action="store_true",
- help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.",
- )
- args.add_argument(
- "--max-tokens",
- type=int,
- default=16,
- help="Maximum number of tokens to generate per output sequence.",
- )
- args.add_argument(
- "--min-tokens",
- type=int,
- default=0,
- help="Minimum number of tokens to generate per output sequence "
- "before EOS or stop_token_ids can be generated",
- )
- args.add_argument(
- "--logprobs",
- type=int,
- default=None,
- help="NNumber of log probabilities to return per output token. "
- "Note that the implementation follows the OpenAI API: The return "
- "result includes the log probabilities on the `logprobs` most likely "
- "tokens, as well the chosen tokens. The API will always return the "
- "log probability of the sampled token, so there may be up to "
- "`logprobs+1` elements in the response.",
- )
- args.add_argument(
- "--prompt-logprobs",
- type=int,
- default=None,
- help="Number of log probabilities to return per prompt token.",
- )
- args.add_argument(
- "--detokenize",
- type=bool,
- default=True,
- help="Whether to detokenize the output. Defaults to True.",
- )
- args.add_argument(
- "--skip-special-tokens",
- default=True,
- action="store_false",
- help="Whether to skip special tokens in the output.",
- )
- args.add_argument(
- "--spaces-between-special-tokens",
- default=True,
- action="store_false",
- help="Whether to add spaces between special tokens in the output. Defaults to True.",
- )
- return args
-
-
-def load_chat_template(tokenizer, chat_template):
- if chat_template is not None:
- try:
- with open(chat_template, "r") as f:
- tokenizer.chat_template = f.read()
- except OSError:
- # If opening a file fails, set chat template to be args to
- # ensure we decode so our escape are interpreted correctly
- tokenizer.chat_template = codecs.decode(chat_template, "unicode_escape")
-
- logging.info(f"Using supplied chat template:\n{tokenizer.chat_template}")
- elif tokenizer.chat_template is not None:
- logging.info(
- f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
- )
- else:
- logging.warning(
- "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm."
- )
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/llava/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/llava/vllm/vllm_public_assets/cherry_blossom.jpg
deleted file mode 100644
index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000
Binary files a/models/multimodal/vision_language_model/llava/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ
diff --git a/models/multimodal/vision_language_model/llava_next_video_7b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/llava_next_video_7b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0fa3df9b4017331b2579cf5e039676248f79fff9
--- /dev/null
+++ b/models/multimodal/vision_language_model/llava_next_video_7b/vllm/ci/prepare.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
diff --git a/models/nlp/plm/albert/ixrt/ci/prepare.sh b/models/nlp/plm/albert/ixrt/ci/prepare.sh
index d78865ec0c31e4dbb393d2d89b4d4ac6a2ce391d..68e8aa19da2132447fdfe6ea48f42bc026f48d7c 100644
--- a/models/nlp/plm/albert/ixrt/ci/prepare.sh
+++ b/models/nlp/plm/albert/ixrt/ci/prepare.sh
@@ -35,7 +35,6 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requ
# edit madlag/albert-base-v2-squad path
# sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
-mv madlag ./ByteMLPerf/byte_infer_perf/general_perf/
# copy open_squad data
cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/
diff --git a/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh b/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh
index 36c5cea9aa366c78077003c3271f4fd402021dd0..3ebc27f17f276362647a9716fcc7aad4e9d77e32 100644
--- a/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh
+++ b/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh
@@ -16,6 +16,7 @@
set -x
+pip3 uninstall numpy
pip3 install -r requirements.txt
# Get pytorch weights
diff --git a/models/nlp/plm/roformer/ixrt/ci/prepare.sh b/models/nlp/plm/roformer/ixrt/ci/prepare.sh
index c3cc4f3d2e12028623cbd00969ac39960db5b490..ea80462db022331cb8b9c20f12a15e9ef8b0bdd6 100644
--- a/models/nlp/plm/roformer/ixrt/ci/prepare.sh
+++ b/models/nlp/plm/roformer/ixrt/ci/prepare.sh
@@ -28,7 +28,8 @@ python3 export_onnx.py --model_path ./data/open_roformer --output_path ./data/op
# Simplify onnx model
onnxsim ./data/open_roformer/roformer-frozen_org.onnx ./data/open_roformer/roformer-frozen.onnx
-python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer.onnx
+python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer-frozen.onnx
+cp ./data/open_roformer/roformer-frozen.onnx ./data/open_roformer/roformer.onnx
# link ByteMLPerf and install requirements
ln -s ../../../../../toolbox/ByteMLPerf ./
@@ -39,7 +40,7 @@ sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_
# Move open_roformer
mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
-mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+cp -r ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
# Setup open_cail2019 dataset
cp /root/data/datasets/open_cail2019/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019
diff --git a/models/others/recommendation/wide_and_deep/ixrt/README.md b/models/others/recommendation/wide_and_deep/ixrt/README.md
index 8e9dd17d650123ead68c985290075b5c912be8ac..22796241f671d6bd7ff4280666270ea572dd8efb 100644
--- a/models/others/recommendation/wide_and_deep/ixrt/README.md
+++ b/models/others/recommendation/wide_and_deep/ixrt/README.md
@@ -56,7 +56,7 @@ export PROJ_PATH=./
#### FP16
```bash
-bash scripts/infer_widedeep_fp16_performance.sh
+bash scripts/infer_wide_and_deep_fp16_performance.sh
```
### Accuracy
diff --git a/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh b/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh
index 2e65a751f891ad3089d1bb5e27c50a032fdaaf81..4a351d878726c5b7c7a20bc3a4ac1dd7eb021db3 100644
--- a/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh
+++ b/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh
@@ -40,7 +40,7 @@ mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
cp /root/data/datasets/eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
-wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx
+cp /root/data/checkpoints/widedeep_dynamicshape_new.onnx ./
cp open_wide_deep_saved_model/* ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape.onnx
diff --git a/models/others/recommendation/wide_and_deep/ixrt/scripts/infer_widedeep_fp16_performance.sh b/models/others/recommendation/wide_and_deep/ixrt/scripts/infer_wide_and_deep_fp16_performance.sh
similarity index 100%
rename from models/others/recommendation/wide_and_deep/ixrt/scripts/infer_widedeep_fp16_performance.sh
rename to models/others/recommendation/wide_and_deep/ixrt/scripts/infer_wide_and_deep_fp16_performance.sh
diff --git a/tests/model_info.json b/tests/model_info.json
index 7df4b9ccfe08e291ce1012cf932ebcd2d1f88937..62ef3eba0dd6fa069eef2dc39c148fe88b15b13e 100644
--- a/tests/model_info.json
+++ b/tests/model_info.json
@@ -22,8 +22,8 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "https://www.openslr.org/33/aishell.tar.gz",
- "download_url": "http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20211025_conformer_exp.tar.gz",
+ "datasets": "https://www.openslr.org/33/aishell",
+ "download_url": "http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20211025_conformer_exp",
"need_third_part": true,
"precisions": [
"fp16"
@@ -187,7 +187,9 @@
"datasets": "https://www.image-net.org/download.php",
"download_url": "https://huggingface.co/openai/clip-vit-base-patch32",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -279,7 +281,7 @@
"github_branch": "",
"github_path": "",
"datasets": "https://www.image-net.org/download.php",
- "download_url": "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth",
+ "download_url": "https://download.pytorch.org/models/convnext_base-6075fbad.pth",
"need_third_part": "",
"precisions": [
"fp16"
@@ -1018,7 +1020,7 @@
"github_branch": "",
"github_path": "",
"datasets": "https://www.image-net.org/download.php",
- "download_url": "https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth",
+ "download_url": "https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth",
"need_third_part": false,
"precisions": [
"fp16",
@@ -2416,8 +2418,7 @@
"download_url": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth",
"need_third_part": false,
"precisions": [
- "fp16",
- "int8"
+ "fp16"
],
"type": "inference",
"hasDemo": false,
@@ -3155,10 +3156,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/tmp",
+ "download_url": "https://drive.google.com/open?id=1R77HmFADxe87GmoLwzfgMu_HY0IhcyBz",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -3185,10 +3188,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/coco",
+ "download_url": "http://files.deepspark.org.cn:880/deepspark/wts/maskrcnn.wts",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -3215,10 +3220,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/coco2017",
+ "download_url": "https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -3245,10 +3252,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/market1501",
+ "download_url": "https://local/ckpt.t7",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -3275,10 +3284,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/VehicleID",
+ "download_url": "https://github.com/JDAI-CV/fast-reid/releases/download/v0.1.1/vehicleid_bot_R50-ibn.pth",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -3305,10 +3316,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/VehicleID",
+ "download_url": "https://local/epoch_14.pth",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -3559,10 +3572,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/coco",
+ "download_url": "https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -3621,10 +3636,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/coco",
+ "download_url": "https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -3683,10 +3700,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/coco",
+ "download_url": "https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -3777,10 +3796,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/widerface",
+ "download_url": "https://github.com/biubug6/Face-Detector-1MB-with-landmark/raw/master/weights/mobilenet0.25_Final.pth",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -4688,7 +4709,9 @@
"datasets": "",
"download_url": "",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": true,
"demoType": "image"
@@ -4716,9 +4739,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://huggingface.co/facebook/chameleon-7b",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -4748,7 +4773,9 @@
"datasets": "",
"download_url": "",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -4776,9 +4803,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://huggingface.co/adept/fuyu-8b",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -4806,9 +4835,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://huggingface.co/OpenGVLab/InternVL2-4B",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": true,
"demoType": "image-to-text"
@@ -4836,9 +4867,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -4866,39 +4899,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
- "need_third_part": "",
- "precisions": "",
- "type": "inference",
- "hasDemo": false,
- "demoType": ""
- },
- {
- "model_name": "minicpm_v",
- "framework": "vllm",
- "release_version": "25.03",
- "release_sdk": "CoreX 4.2.0",
- "release_gpgpu": "BI-V150",
- "latest_sdk": "4.2.0",
- "latest_gpgpu": "BI-V150",
- "category": "multimodal/vision_language_model",
- "toolbox": "",
- "mdims": "",
- "dataset": "",
- "license": "",
- "model_path": "models/multimodal/vision_language_model/minicpm_v/vllm/",
- "readme_file": "models/multimodal/vision_language_model/minicpm_v/vllm/README.md",
- "bitbucket_repo": "",
- "bitbucket_branch": "",
- "bitbucket_path": "",
- "develop_owner": "",
- "github_repo": "",
- "github_branch": "",
- "github_path": "",
- "datasets": "",
- "download_url": "",
+ "download_url": "https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5023,9 +5028,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5053,9 +5060,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5083,9 +5092,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5113,9 +5124,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5143,9 +5156,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5173,9 +5188,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": true,
"demoType": "chat"
@@ -5491,9 +5508,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://modelscope.cn/models/qwen/Qwen1.5-7B",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5651,7 +5670,9 @@
"datasets": "",
"download_url": "",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5678,10 +5699,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/china-people-daily-ner-corpus",
+ "download_url": "https://huggingface.co/bert-base-chinese",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "int8"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5708,10 +5731,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/SQuAD",
+ "download_url": "https://huggingface.co/csarron/bert-base-uncased-squad-v1",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5739,9 +5764,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://local/bert_base_uncased_squad",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5768,10 +5795,12 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "",
- "download_url": "",
+ "datasets": "local/SQuAD",
+ "download_url": "https://huggingface.co/neuralmagic/bert-large-uncased-finetuned-squadv1",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5799,9 +5828,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://local/bert-large-uncased",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5829,9 +5860,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_deberta",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5859,9 +5892,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5889,9 +5924,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roformer",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -5919,9 +5956,11 @@
"github_branch": "",
"github_path": "",
"datasets": "",
- "download_url": "",
+ "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_videobert",
"need_third_part": "",
- "precisions": "",
+ "precisions": [
+ "fp16"
+ ],
"type": "inference",
"hasDemo": false,
"demoType": ""
@@ -6428,7 +6467,7 @@
"github_repo": "",
"github_branch": "",
"github_path": "",
- "datasets": "cityscapes",
+ "datasets": "local/cityscapes",
"download_url": "https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth",
"need_third_part": false,
"precisions": [
@@ -6855,7 +6894,7 @@
"demoType": ""
},
{
- "model_name": "mllama",
+ "model_name": "llama-3.2",
"framework": "vllm",
"release_version": "25.06",
"release_sdk": "4.2.0",
@@ -6867,8 +6906,8 @@
"mdims": "",
"dataset": "",
"license": "",
- "model_path": "models/multimodal/vision_language_model/mllama/vllm",
- "readme_file": "models/multimodal/vision_language_model/mllama/vllm/README.md",
+ "model_path": "models/multimodal/vision_language_model/llama-3.2/vllm",
+ "readme_file": "models/multimodal/vision_language_model/llama-3.2/vllm/README.md",
"bitbucket_repo": "",
"bitbucket_branch": "",
"bitbucket_path": "",
diff --git a/tests/run_igie.py b/tests/run_igie.py
index 46e5636b98effaef49606ed74a94596f13013d45..3b9f7cf05c7d122ce402051ab44bde0900178ecb 100644
--- a/tests/run_igie.py
+++ b/tests/run_igie.py
@@ -94,6 +94,16 @@ def main():
logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}")
logging.info(f"End running {model['model_name']} test case.")
+ # multi_object_tracking模型
+ if model["category"] in ["cv/multi_object_tracking"]:
+ logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
+ d_url = model["download_url"]
+ if d_url is not None:
+ result = run_multi_object_tracking_testcase(model)
+ check_model_result(result)
+ logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}")
+ logging.info(f"End running {model['model_name']} test case.")
+
# Speech模型
if model["category"] in ["audio/speech_recognition"]:
logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
@@ -159,13 +169,22 @@ def run_clf_testcase(model):
for prec in model["precisions"]:
logging.info(f"Start running {model_name} {prec} test case")
- script = f"""
- export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val
- export RUN_DIR=../../igie_common/
- cd ../{model['model_path']}
- bash scripts/infer_{model_name}_{prec}_accuracy.sh
- bash scripts/infer_{model_name}_{prec}_performance.sh
- """
+ if model_name == "unet":
+ script = f"""
+ export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n}
+ export RUN_DIR=../../igie_common/
+ cd ../{model['model_path']}
+ bash scripts/infer_{model_name}_{prec}_accuracy.sh
+ bash scripts/infer_{model_name}_{prec}_performance.sh
+ """
+ else:
+ script = f"""
+ export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val
+ export RUN_DIR=../../igie_common/
+ cd ../{model['model_path']}
+ bash scripts/infer_{model_name}_{prec}_accuracy.sh
+ bash scripts/infer_{model_name}_{prec}_performance.sh
+ """
r, t = run_script(script)
sout = r.stdout
@@ -213,9 +232,9 @@ def run_detec_testcase(model):
ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
"""
- if model["need_third_part"] and model["3rd_party_repo"]:
- third_party_repo = model["3rd_party_repo"]
- prepare_script += f"unzip /mnt/deepspark/data/3rd_party/{third_party_repo}.zip -d ./\n"
+ # if model["need_third_part"] and model["3rd_party_repo"]:
+ # third_party_repo = model["3rd_party_repo"]
+ # prepare_script += f"unzip /mnt/deepspark/data/3rd_party/{third_party_repo}.zip -d ./\n"
prepare_script += "bash ci/prepare.sh\n"
# add pip list info when in debug mode
@@ -384,6 +403,63 @@ def run_trace_testcase(model):
logging.debug(f"matchs:\n{matchs}")
return result
+def run_multi_object_tracking_testcase(model):
+ model_name = model["model_name"]
+ result = {
+ "name": model_name,
+ "result": {},
+ }
+ d_url = model["download_url"]
+ checkpoint_n = d_url.split("/")[-1]
+ dataset_n = model["datasets"].split("/")[-1]
+ prepare_script = f"""
+ cd ../{model['model_path']}
+ ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./
+ ln -s /mnt/deepspark/data/datasets/{dataset_n} ./
+ """
+
+ prepare_script += """
+ bash ci/prepare.sh
+ ls -l | grep onnx
+ """
+
+ # add pip list info when in debug mode
+ if utils.is_debug():
+ pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+ prepare_script = pip_list_script + prepare_script + pip_list_script
+
+ run_script(prepare_script)
+
+ for prec in model["precisions"]:
+ logging.info(f"Start running {model_name} {prec} test case")
+ script = f"""
+ cd ../{model['model_path']}
+ export DATASETS_DIR=./{dataset_n}/
+ bash scripts/infer_{model_name}_{prec}_accuracy.sh
+ bash scripts/infer_{model_name}_{prec}_performance.sh
+ """
+
+ r, t = run_script(script)
+ sout = r.stdout
+ pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*"
+ matchs = re.findall(pattern, sout)
+ for m in matchs:
+ result["result"].setdefault(prec, {"status": "FAIL"})
+ try:
+ result["result"][prec] = result["result"][prec] | {m[0]: float(m[1]), m[2]: float(m[3])}
+ except ValueError:
+ print("The string cannot be converted to a float.")
+ result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]}
+ pattern = METRIC_PATTERN
+ matchs = re.findall(pattern, sout)
+ if matchs and len(matchs) == 1:
+ result["result"].setdefault(prec, {})
+ result["result"][prec].update(get_metric_result(matchs[0]))
+ result["result"][prec]["status"] = "PASS"
+ result["result"][prec]["Cost time (s)"] = t
+ logging.debug(f"matchs:\n{matchs}")
+ return result
+
# BERT series models
def run_nlp_testcase(model):
model_name = model["model_name"]
diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py
index a19223ad859fdc6f4cf8d9e14c3d7c93086925ad..9464042144020809ff2d7f3983ff74d924e1df3f 100644
--- a/tests/run_ixrt.py
+++ b/tests/run_ixrt.py
@@ -94,6 +94,16 @@ def main():
logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}")
logging.info(f"End running {model['model_name']} test case.")
+ # instance_segmentation模型
+ if model["category"] in ["cv/instance_segmentation"]:
+ logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
+ d_url = model["download_url"]
+ if d_url is not None:
+ result = run_instance_segmentation_testcase(model)
+ check_model_result(result)
+ logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}")
+ logging.info(f"End running {model['model_name']} test case.")
+
# NLP模型
if model["category"] in ["nlp/plm", "others/recommendation"]:
logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
@@ -191,9 +201,29 @@ def run_clf_testcase(model):
match_count += 1
result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}")
break
-
if match_count == len(patterns):
result["result"][prec]["status"] = "PASS"
+
+ if model_name == "swin_transformer_large":
+ pattern = r'Throughput: (\d+\.\d+) qps'
+ matchs = re.findall(pattern, sout)
+ for m in matchs:
+ result["result"].setdefault(prec, {"status": "FAIL"})
+ try:
+ result["result"][prec]["QPS"] = float(m)
+ except ValueError:
+ print("The string cannot be converted to a float.")
+ result["result"][prec]["QPS"] = m
+
+ pattern = METRIC_PATTERN
+ matchs = re.findall(pattern, sout)
+ result["result"].setdefault(prec, {"status": "FAIL"})
+ logging.debug(f"matchs:\n{matchs}")
+ for m in matchs:
+ result["result"][prec].update(get_metric_result(m))
+ if len(matchs) == 1:
+ result["result"][prec]["status"] = "PASS"
+
result["result"][prec]["Cost time (s)"] = t
logging.debug(f"matchs:\n{matchs}")
return result
@@ -375,7 +405,9 @@ def run_nlp_testcase(model):
bash scripts/infer_{model_name}_{prec}_performance.sh
cd ./ByteMLPerf/byte_infer_perf/general_perf
"""
- if model_name == "roformer" or model_name == "widedeep":
+ if model_name == "roformer" or model_name == "wide_and_deep":
+ if model_name == "wide_and_deep":
+ model_name = "widedeep"
script += f"""
python3 core/perf_engine.py --hardware_type ILUVATAR --task {model_name}-tf-fp32
"""
@@ -414,13 +446,23 @@ def run_nlp_testcase(model):
r, t = run_script(script)
sout = r.stdout
+ pattern = r'Throughput: (\d+\.\d+) qps'
+ matchs = re.findall(pattern, sout)
+ for m in matchs:
+ result["result"].setdefault(prec, {"status": "FAIL"})
+ try:
+ result["result"][prec]["QPS"] = float(m)
+ except ValueError:
+ print("The string cannot be converted to a float.")
+ result["result"][prec]["QPS"] = m
+
pattern = METRIC_PATTERN
matchs = re.findall(pattern, sout)
result["result"].setdefault(prec, {"status": "FAIL"})
logging.debug(f"matchs:\n{matchs}")
for m in matchs:
result["result"][prec].update(get_metric_result(m))
- if len(matchs) == 2:
+ if len(matchs) == 1:
result["result"][prec]["status"] = "PASS"
result["result"][prec]["Cost time (s)"] = t
@@ -478,6 +520,59 @@ def run_speech_testcase(model):
logging.debug(f"matchs:\n{matchs}")
return result
+def run_instance_segmentation_testcase(model):
+ model_name = model["model_name"]
+ result = {
+ "name": model_name,
+ "result": {},
+ }
+ d_url = model["download_url"]
+ checkpoint_n = d_url.split("/")[-1]
+ dataset_n = model["datasets"].split("/")[-1]
+ prepare_script = f"""
+ cd ../{model['model_path']}
+ ln -s /root/data/checkpoints/{checkpoint_n} ./
+ ln -s /root/data/datasets/{dataset_n} ./
+ bash ci/prepare.sh
+ ls -l | grep onnx
+ """
+
+ # add pip list info when in debug mode
+ if utils.is_debug():
+ pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+ prepare_script = pip_list_script + prepare_script + pip_list_script
+
+ run_script(prepare_script)
+
+ for prec in model["precisions"]:
+ logging.info(f"Start running {model_name} {prec} test case")
+ script = f"""
+ cd ../{model['model_path']}
+ export PROJ_DIR=./
+ export DATASETS_DIR=./coco2017/
+ export CHECKPOINTS_DIR=./checkpoints
+ export COCO_GT=./coco2017/annotations/instances_val2017.json
+ export EVAL_DIR=./coco2017/val2017
+ export RUN_DIR=./
+ bash scripts/infer_{model_name}_{prec}_accuracy.sh
+ bash scripts/infer_{model_name}_{prec}_performance.sh
+ """
+
+ r, t = run_script(script)
+ sout = r.stdout
+ pattern = METRIC_PATTERN
+ matchs = re.findall(pattern, sout)
+ result["result"].setdefault(prec, {"status": "FAIL"})
+ logging.debug(f"matchs:\n{matchs}")
+ for m in matchs:
+ result["result"][prec].update(get_metric_result(m))
+ if len(matchs) == 2:
+ result["result"][prec]["status"] = "PASS"
+
+ result["result"][prec]["Cost time (s)"] = t
+ logging.debug(f"matchs:\n{matchs}")
+ return result
+
def get_metric_result(str):
if str:
return json.loads(str.replace("'", "\""))["metricResult"]
diff --git a/tests/run_trtllm.py b/tests/run_trtllm.py
index c57e02816ef6dba5f829ecfaf2e7b1e3849d0da5..ac79b3b9591e4cd65269a4d5e0341b4b2d2d0a9a 100644
--- a/tests/run_trtllm.py
+++ b/tests/run_trtllm.py
@@ -72,7 +72,7 @@ def get_model_config(mode_name):
models = json.load(file)
for model in models['models']:
- if model["model_name"] == mode_name.lower() and model["framework"] == "trtllm":
+ if model["model_name"] == mode_name.lower() and (model["framework"] == "trtllm" or model["framework"] == "tgi"):
return model
return
diff --git a/tests/run_vllm.py b/tests/run_vllm.py
index a200569cfda5db31f720bcd9ccffba70399a056b..e05973fdd535f8d626c53056c2280e8dcbf214e6 100644
--- a/tests/run_vllm.py
+++ b/tests/run_vllm.py
@@ -205,10 +205,14 @@ def run_nlp_testcase(model):
python3 offline_inference.py --model ./stablelm --max-tokens 256 -tp 1 --temperature 0.0
"""
elif model_name.startswith("deepseek-r1-distill-"):
+ if model_name == "deepseek-r1-distill-qwen-32b":
+ tp = 4
+ else:
+ tp = 2
script = f"""
set -x
cd ../{model['model_path']}
- python3 offline_inference.py --model ./{model_name} --max-tokens 256 -tp 2 --temperature 0.0 --max-model-len 3096
+ python3 offline_inference.py --model ./{model_name} --max-tokens 256 -tp {tp} --temperature 0.0 --max-model-len 3096
"""
elif model_name == "aria":
script = f"""
@@ -217,6 +221,13 @@ def run_nlp_testcase(model):
export VLLM_ASSETS_CACHE=../vllm/
python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --dtype bfloat16 --tokenizer-mode slow
"""
+ elif model_name == "chameleon_7b" or model_name == "fuyu_8b":
+ script = f"""
+ set -x
+ cd ../{model['model_path']}
+ export VLLM_ASSETS_CACHE=../vllm/
+ python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0
+ """
elif model_name == "h2vol" or model_name == "idefics3":
script = f"""
set -x
@@ -231,7 +242,7 @@ def run_nlp_testcase(model):
export VLLM_ASSETS_CACHE=../vllm/
PT_SDPA_ENABLE_HEAD_DIM_PADDING=1 python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0
"""
- elif model_name == "mllama":
+ elif model_name == "llama-3.2":
script = f"""
set -x
cd ../{model['model_path']}
@@ -246,6 +257,27 @@ def run_nlp_testcase(model):
export VLLM_ASSETS_CACHE=../vllm/
python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --tokenizer-mode 'mistral'
"""
+ elif model_name == "llava":
+ script = f"""
+ set -x
+ cd ../{model['model_path']}
+ export VLLM_ASSETS_CACHE=../vllm/
+ python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --model-type llava-next --max-model-len 4096
+ """
+ elif model_name == "llava_next_video_7b":
+ script = f"""
+ set -x
+ cd ../{model['model_path']}
+ export VLLM_ASSETS_CACHE=../vllm/
+ python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --model-type llava-next-video --modality video --dtype bfloat16
+ """
+ elif model_name == "intern_vl":
+ script = f"""
+ set -x
+ cd ../{model['model_path']}
+ export VLLM_ASSETS_CACHE=../vllm/
+ python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 2 --temperature 0.0 --max-model-len 2048
+ """
r, t = run_script(script)
sout = r.stdout
@@ -257,6 +289,13 @@ def run_nlp_testcase(model):
result["result"][prec]["tokens"] = int(matchs.group(1))
result["result"][prec]["QPS"] = float(matchs.group(2))
result["result"][prec]["status"] = "PASS"
+ else:
+ pattern = r"Maximum concurrency for (\d+) tokens per request: ([\d.]+)x"
+ matchs = re.search(pattern, sout)
+ if matchs:
+ result["result"][prec]["tokens"] = int(matchs.group(1))
+ result["result"][prec]["QPS"] = float(matchs.group(2))
+ result["result"][prec]["status"] = "PASS"
result["result"][prec]["Cost time (s)"] = t
return result
diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
index 089d9860f573bba7e19f84aa20fb830a8fcc22d8..f8a2797282b4a2edbace565b8a7d68ad3090ea48 100644
--- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
@@ -210,6 +210,9 @@ class PerfEngine:
if accuracy_report:
base_report['Accuracy'] = accuracy_report
+ metricResult = {}
+ metricResult['metricResult'] = accuracy_report
+ print(metricResult)
# function to test qps and latency
if workload['test_perf']: