diff --git a/models/audio/speech_recognition/conformer/igie/README.md b/models/audio/speech_recognition/conformer/igie/README.md index 9141b3c8bab3498c3ef8d9638de8f945a9089b70..ae96f9d4b9433e57973f2f7d6d1b5f1e206ef9aa 100644 --- a/models/audio/speech_recognition/conformer/igie/README.md +++ b/models/audio/speech_recognition/conformer/igie/README.md @@ -24,6 +24,12 @@ Dataset: to download the Aishell dataset. ### Install Dependencies ```bash +# Install libGL +## CentOS +yum install sox sox-devel -y +## Ubuntu +apt install sox libsox-fmt-all -y + pip3 install -r requirements.txt cd ctc_decoder/swig && bash setup.sh cd ../../ diff --git a/models/audio/speech_recognition/conformer/igie/ci/prepare.sh b/models/audio/speech_recognition/conformer/igie/ci/prepare.sh index 49f448a67b02f4ee0dd8b313948e9b8d710c2fce..8290acf0fb594993f0439c539f7abfff54a34a15 100644 --- a/models/audio/speech_recognition/conformer/igie/ci/prepare.sh +++ b/models/audio/speech_recognition/conformer/igie/ci/prepare.sh @@ -16,6 +16,15 @@ set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install sox libsox-fmt-all -y +elif [[ ${ID} == "centos" ]]; then + yum install sox sox-devel -y +else + echo "Not Support Os" +fi + pip3 install -r requirements.txt cd ctc_decoder/swig && bash setup.sh cd ../../ @@ -39,4 +48,4 @@ onnxsim encoder_bs24_seq384_static.onnx encoder_bs24_seq384_static_opt.onnx python3 alter_onnx.py --batch_size 24 --path encoder_bs24_seq384_static_opt.onnx # Need to unzip aishell to the current directory. For details, refer to data.list -tar -zxvf aishell.tar.gz +# tar -zxvf aishell.tar.gz diff --git a/models/audio/speech_recognition/conformer/igie/requirements.txt b/models/audio/speech_recognition/conformer/igie/requirements.txt index 2f7cd1f24262857100607eb19f6ccc14b7e98a31..8820eb754dec653c319dc0c86d53049346c7f7b6 100644 --- a/models/audio/speech_recognition/conformer/igie/requirements.txt +++ b/models/audio/speech_recognition/conformer/igie/requirements.txt @@ -1,4 +1,4 @@ tqdm onnx typeguard==2.13.3 -onnxsim +onnxsim \ No newline at end of file diff --git a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py index 4a008183ee25cd88b2fa25d93bdc3f9e3a55d31a..377e110b36cc140a55edc9dcc1b20dc5f91387a2 100644 --- a/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py +++ b/models/audio/speech_recognition/conformer/igie/wenet/wenet/utils/init_model.py @@ -29,7 +29,7 @@ from wenet.utils.cmvn import load_cmvn def init_model(configs): if configs['cmvn_file'] is not None: - mean, istd = load_cmvn(configs['cmvn_file'], configs['is_json_cmvn']) + mean, istd = load_cmvn(configs['cmvn_file'], configs['cmvn_conf']['is_json_cmvn']) global_cmvn = GlobalCMVN( torch.from_numpy(mean).float(), torch.from_numpy(istd).float()) diff --git a/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh b/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh index 5a1f966836c58193331ab4d43411a5622c04ad79..3b9bb751e92924655b447fdc785899465a846b61 100644 --- a/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh +++ b/models/audio/speech_recognition/transformer_asr/ixrt/ci/prepare.sh @@ -32,6 +32,8 @@ cp -r /root/data/checkpoints/8886 results/transformer/ mkdir -p results/transformer/8886/save mkdir -p /home/data/speechbrain/aishell/csv_data ln -s /root/data/datasets/AISHELL/data_aishell /home/data/speechbrain/aishell/ +cp /root/data/datasets/rirs_noises.zip /home/data/speechbrain/aishell/ +unzip -o /home/data/speechbrain/aishell/rirs_noises.zip -d /home/data/speechbrain/aishell/ cp results/transformer/8886/*.csv /home/data/speechbrain/aishell/csv_data bash build.sh diff --git a/models/cv/classification/densenet121/ixrt/README.md b/models/cv/classification/densenet121/ixrt/README.md index 58cce7a327e3d3b00c4b2ccf289e23124050d02a..a5dbc7c7f19a4121e1d769ec50a9b7e2c308489b 100644 --- a/models/cv/classification/densenet121/ixrt/README.md +++ b/models/cv/classification/densenet121/ixrt/README.md @@ -33,8 +33,9 @@ pip3 install -r ../../ixrt_common/requirements.txt ### Model Conversion ```bash +# download model into /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth mkdir checkpoints -python3 ../../ixrt_common/export.py --model-name densenet121 --weight densenet121-a639ec97.pth --output checkpoints/densenet121.onnx +python3 export.py --output checkpoints/densenet121.onnx ``` ## Model Inference diff --git a/models/cv/classification/densenet121/ixrt/ci/prepare.sh b/models/cv/classification/densenet121/ixrt/ci/prepare.sh index 3ac521c0ed745dd4b98f475b9af614bff3137105..8d542a84ee0e27037281fc85a3a9799d159476e8 100644 --- a/models/cv/classification/densenet121/ixrt/ci/prepare.sh +++ b/models/cv/classification/densenet121/ixrt/ci/prepare.sh @@ -27,4 +27,4 @@ fi pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 ../../ixrt_common/export.py --model-name densenet121 --weight densenet121-a639ec97.pth --output checkpoints/densenet121.onnx \ No newline at end of file +python3 export.py --output checkpoints/densenet121.onnx \ No newline at end of file diff --git a/models/cv/classification/densenet121/ixrt/export.py b/models/cv/classification/densenet121/ixrt/export.py new file mode 100644 index 0000000000000000000000000000000000000000..ff86753a004cd6611c7c7104e0061904bc3d2184 --- /dev/null +++ b/models/cv/classification/densenet121/ixrt/export.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import torch +import torchvision.models as models +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = models.densenet121(pretrained=True) +model.cuda() +model.eval() +input = torch.randn(1, 3, 224, 224, device='cuda') +export_onnx_file = args.output_model + +torch.onnx.export(model, + input, + export_onnx_file, + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names = ['input'], + output_names = ['output'],) +print(" ") +print('Model has been converted to ONNX') +print("exit") +exit() diff --git a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh index c69f7471f6a82156b66ba0cd953c0e25e0d9ec17..7bb940752879bc68163f7a4dc4c31e3494dbec54 100644 --- a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh +++ b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh @@ -28,4 +28,4 @@ fi pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 ../../ixrt_common/export_onnx.py --model-name efficientnet_b1 --output_model checkpoints/efficientnet-b1.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name efficientnet_b1 --weight efficientnet_b1-c27df63c.pth --output checkpoints/efficientnet_b1.onnx \ No newline at end of file diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/inference.py b/models/cv/classification/efficientnetv2_rw_t/igie/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..e33c91fa1de2d25402f0ad3318e15f372d829908 --- /dev/null +++ b/models/cv/classification/efficientnetv2_rw_t/igie/inference.py @@ -0,0 +1,183 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import sys +import argparse +import tvm +import torch +import torchvision +import numpy as np +from tvm import relay +from tqdm import tqdm +from torchvision import transforms +from torchvision.transforms.functional import InterpolationMode + +from timm.data import create_dataset, create_loader + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--engine", + type=str, + required=True, + help="igie engine path.") + + parser.add_argument("--batchsize", + type=int, + required=True, + help="inference batch size.") + + parser.add_argument("--datasets", + type=str, + required=True, + help="datasets path.") + + parser.add_argument("--input_name", + type=str, + required=True, + help="input name of the model.") + + parser.add_argument("--warmup", + type=int, + default=3, + help="number of warmup before test.") + + parser.add_argument("--num_workers", + type=int, + default=16, + help="number of workers used in pytorch dataloader.") + + parser.add_argument("--acc_target", + type=float, + default=None, + help="Model inference Accuracy target.") + + parser.add_argument("--fps_target", + type=float, + default=None, + help="Model inference FPS target.") + + parser.add_argument("--perf_only", + type=bool, + default=False, + help="Run performance test only") + + args = parser.parse_args() + + return args + +def get_dataloader(data_path, batch_size, num_workers): + datasets = create_dataset(root=data_path, name="") + + dataloader = create_loader( + datasets, + input_size=(3, 288, 288), + batch_size=batch_size, + interpolation='bicubic', + mean=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + crop_pct=1.0, + use_prefetcher = False, + num_workers = num_workers + ) + return dataloader + +def get_topk_accuracy(pred, label): + if isinstance(pred, np.ndarray): + pred = torch.from_numpy(pred) + + if isinstance(label, np.ndarray): + label = torch.from_numpy(label) + + top1_acc = 0 + top5_acc = 0 + for idx in range(len(label)): + label_value = label[idx] + if label_value == torch.topk(pred[idx].float(), 1).indices.data: + top1_acc += 1 + top5_acc += 1 + + elif label_value in torch.topk(pred[idx].float(), 5).indices.data: + top5_acc += 1 + + return top1_acc, top5_acc + +def main(): + args = parse_args() + + batch_size = args.batchsize + + # create iluvatar target & device + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + + # load engine + lib = tvm.runtime.load_module(args.engine) + + # create runtime from engine + module = tvm.contrib.graph_executor.GraphModule(lib["default"](device)) + + # just run perf test + if args.perf_only: + ftimer = module.module.time_evaluator("run", device, number=100, repeat=1) + prof_res = np.array(ftimer().results) * 1000 + fps = batch_size * 1000 / np.mean(prof_res) + print(f"\n* Mean inference time: {np.mean(prof_res):.3f} ms, Mean fps: {fps:.3f}") + else: + # warm up + for _ in range(args.warmup): + module.run() + + # get dataloader + dataloader = get_dataloader(args.datasets, batch_size, args.num_workers) + + top1_acc = 0 + top5_acc = 0 + total_num = 0 + + for image, label in tqdm(dataloader): + + # pad the last batch + pad_batch = len(image) != batch_size + + if pad_batch: + origin_size = len(image) + image = np.resize(image, (batch_size, *image.shape[1:])) + + module.set_input(args.input_name, tvm.nd.array(image, device)) + + # run inference + module.run() + + pred = module.get_output(0).asnumpy() + + if pad_batch: + pred = pred[:origin_size] + + # get batch accuracy + batch_top1_acc, batch_top5_acc = get_topk_accuracy(pred, label) + + top1_acc += batch_top1_acc + top5_acc += batch_top5_acc + total_num += batch_size + + result_stat = {} + result_stat["acc@1"] = round(top1_acc / total_num * 100.0, 3) + result_stat["acc@5"] = round(top5_acc / total_num * 100.0, 3) + + print(f"\n* Top1 acc: {result_stat['acc@1']} %, Top5 acc: {result_stat['acc@5']} %") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/requirements.txt b/models/cv/classification/efficientnetv2_rw_t/igie/requirements.txt deleted file mode 100644 index 36677a29ab3a81e04e55e2185513580169404d15..0000000000000000000000000000000000000000 --- a/models/cv/classification/efficientnetv2_rw_t/igie/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -timm -onnx -tqdm diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh index 9d96fcfb93ffd560a2a682f94068fd3322833a93..7e2e5ffbc665d6e70d0dc5ff7bcf0b870d79dd1b 100644 --- a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh +++ b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh @@ -28,7 +28,7 @@ python3 ${RUN_DIR}build_engine.py \ # inference -python3 ${RUN_DIR}inference.py \ +python3 inference.py \ --engine efficientnetv2_rw_t_bs_${batchsize}_fp16.so \ --batchsize ${batchsize} \ --input_name input \ diff --git a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh index a3f9d58520cb98365e5c789fce1f07dba5627249..c08b48407740ee447d6bad514fe0aa76c001aec6 100644 --- a/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh +++ b/models/cv/classification/efficientnetv2_rw_t/igie/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh @@ -28,7 +28,7 @@ python3 ${RUN_DIR}build_engine.py \ # inference -python3 ${RUN_DIR}inference.py \ +python3 inference.py \ --engine efficientnetv2_rw_t_bs_${batchsize}_fp16.so \ --batchsize ${batchsize} \ --input_name input \ diff --git a/models/cv/classification/ixrt_common/build_i8_engine.py b/models/cv/classification/ixrt_common/build_i8_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..6038b33f50cff7a14efcefa6673ae9d2fd19870b --- /dev/null +++ b/models/cv/classification/ixrt_common/build_i8_engine.py @@ -0,0 +1,112 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import argparse +import json +import os + +import tensorrt +import tensorrt as trt + +TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) + +EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + + +def GiB(val): + return val * 1 << 30 + + +def json_load(filename): + with open(filename) as json_file: + data = json.load(json_file) + return data + + +def setDynamicRange(network, json_file): + """Sets ranges for network layers.""" + quant_param_json = json_load(json_file) + act_quant = quant_param_json["act_quant_info"] + + for i in range(network.num_inputs): + input_tensor = network.get_input(i) + if act_quant.__contains__(input_tensor.name): + print(input_tensor.name) + value = act_quant[input_tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + input_tensor.dynamic_range = (tensor_min, tensor_max) + + for i in range(network.num_layers): + layer = network.get_layer(i) + + for output_index in range(layer.num_outputs): + tensor = layer.get_output(output_index) + + if act_quant.__contains__(tensor.name): + value = act_quant[tensor.name] + tensor_max = abs(value) + tensor_min = -abs(value) + tensor.dynamic_range = (tensor_min, tensor_max) + else: + print("\033[1;32m%s\033[0m" % tensor.name) + + +def build_engine(onnx_file, json_file, engine_file): + builder = trt.Builder(TRT_LOGGER) + network = builder.create_network(EXPLICIT_BATCH) + + config = builder.create_builder_config() + + # If it is a dynamic onnx model , you need to add the following. + # profile = builder.create_optimization_profile() + # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) + # config.add_optimization_profile(profile) + + parser = trt.OnnxParser(network, TRT_LOGGER) + # config.max_workspace_size = GiB(1) + if not os.path.exists(onnx_file): + quit("ONNX file {} not found".format(onnx_file)) + + with open(onnx_file, "rb") as model: + if not parser.parse(model.read()): + print("ERROR: Failed to parse the ONNX file.") + for error in range(parser.num_errors): + print(parser.get_error(error)) + return None + + config.set_flag(trt.BuilderFlag.INT8) + + setDynamicRange(network, json_file) + + engine = builder.build_engine(network, config) + + with open(engine_file, "wb") as f: + f.write(engine.serialize()) + + +if __name__ == "__main__": + # Add plugins if needed + # import ctypes + # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") + parser = argparse.ArgumentParser( + description="Writing qparams to onnx to convert tensorrt engine." + ) + parser.add_argument("--onnx", type=str, default=None) + parser.add_argument("--qparam_json", type=str, default=None) + parser.add_argument("--engine", type=str, default=None) + arg = parser.parse_args() + + build_engine(arg.onnx, arg.qparam_json, arg.engine) + print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG index 0275a569d9c68074365cbe75427920818aec93ca..42e3e648cb2839c2017ec5134a9625f46566be73 100644 --- a/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG +++ b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG @@ -17,8 +17,8 @@ # MODEL_NAME : 生成onnx/engine的basename # ORIGINE_MODEL : 原始onnx文件名称 IMGSIZE=224 -MODEL_NAME=EfficientNet_b1 -ORIGINE_MODEL=efficientnet-b1.onnx +MODEL_NAME=efficientnet_b1 +ORIGINE_MODEL=efficientnet_b1.onnx # QUANT CONFIG (仅PRECISION为int8时生效) # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] diff --git a/models/cv/classification/ixrt_common/quant_i8.py b/models/cv/classification/ixrt_common/quant_i8.py new file mode 100644 index 0000000000000000000000000000000000000000..c728c7a128f7ba5a041160c9452980861c7a9071 --- /dev/null +++ b/models/cv/classification/ixrt_common/quant_i8.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: + +在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 + +This file will show you how to quantize your network with PPQ + You should prepare your model and calibration dataset as follow: + + ~/working/model.onnx <-- your model + ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset + +if you are using caffe model: + ~/working/model.caffemdoel <-- your model + ~/working/model.prototext <-- your model + +### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### + +quantized model will be generated at: ~/working/quantized.onnx +""" +from ppq import * +from ppq.api import * +import os +from calibration_dataset import getdataloader +import argparse +import random +import numpy as np +import torch + + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str) + parser.add_argument("--dataset_dir", type=str, default="imagenet_val") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], + default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=224) + args = parser.parse_args() + print("Quant config:", args) + print(args.disable_quant_names) + return args + + +config = parse_args() + +# modify configuration below: +WORKING_DIRECTORY = 'checkpoints' # choose your working directory +TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform +MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE +INPUT_LAYOUT = 'chw' # input data layout, chw or hwc +NETWORK_INPUTSHAPE = [32, 3, 224, 224] # input shape of your network +EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. +REQUIRE_ANALYSE = False +TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 +# ------------------------------------------------------------------- +# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 +# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx +# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 +# ------------------------------------------------------------------- +graph = None +if MODEL_TYPE == NetworkFramework.ONNX: + graph = load_onnx_graph(onnx_import_file=config.model) +if MODEL_TYPE == NetworkFramework.CAFFE: + graph = load_caffe_graph( + caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), + prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) +assert graph is not None, 'Graph Loading Error, Check your input again.' + +# ------------------------------------------------------------------- +# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 +# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 +# ------------------------------------------------------------------- +QS = QuantizationSettingFactory.default_setting() + +# ------------------------------------------------------------------- +# 下面向你展示了如何使用 finetuning 过程提升量化精度 +# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 +# 开启他们的方式都是 QS.xxxx = True +# 按需使用,不要全部打开,容易起飞 +# ------------------------------------------------------------------- +if TRAINING_YOUR_NETWORK: + QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 + QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 + QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' + + +dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) +# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 +# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 +with ENABLE_CUDA_KERNEL(): + print('网络正量化中,根据你的量化配置,这将需要一段时间:') + quantized = quantize_native_model( + setting=QS, # setting 对象用来控制标准量化逻辑 + model=graph, + calib_dataloader=dataloader, + calib_steps=config.step, + input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 + inputs=None, + # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] + collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, + # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None + platform=TARGET_PLATFORM, + device=EXECUTING_DEVICE, + do_quantize=True) + + # ------------------------------------------------------------------- + # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor + # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 + # 请注意,必须在 export 之前执行此操作。 + # ------------------------------------------------------------------- + executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) + # output = executor.forward(input) + + # ------------------------------------------------------------------- + # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 + # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% + # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 + # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 + # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 + # ------------------------------------------------------------------- + print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') + reports = graphwise_error_analyse( + graph=quantized, running_device=EXECUTING_DEVICE, steps=32, + dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) + for op, snr in reports.items(): + if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') + + if REQUIRE_ANALYSE: + print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') + layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, + interested_outputs=None, + dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) + + # ------------------------------------------------------------------- + # 使用 export_ppq_graph 函数来导出量化后的模型 + # PPQ 会根据你所选择的导出平台来修改模型格式 + # ------------------------------------------------------------------- + print('网络量化结束,正在生成目标文件:') + export_ppq_graph( + graph=quantized, platform=TARGET_PLATFORM, + graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), + config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/resnetv1d50/ixrt/README.md b/models/cv/classification/resnetv1d50/ixrt/README.md index 0a5cf2cf44e5bc65ffaae70eaa449ee81dc29e2b..9a8d945de7190080c83437591649145961c7eecb 100644 --- a/models/cv/classification/resnetv1d50/ixrt/README.md +++ b/models/cv/classification/resnetv1d50/ixrt/README.md @@ -28,7 +28,7 @@ yum install -y mesa-libGL apt install -y libgl1-mesa-glx pip3 install -r ../../ixrt_common/requirments.txt -pip3 install mmcv==1.5.3 mmcls==0.24.0 +pip3 install mmcv==1.5.3 mmcls==0.24.0 ppq pycuda transformers==4.37.1 ``` ### Model Conversion diff --git a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh index 22566b28034a67b41a63152251dfbd32206087cb..d5adbfd5beca26708b1a07d262b46ff661c5bb9b 100644 --- a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh @@ -26,7 +26,7 @@ else fi pip install -r ../../ixrt_common/requirements.txt -pip install mmcv==1.5.3 mmcls==0.24.0 +pip install mmcv==1.5.3 mmcls==0.24.0 ppq pycuda transformers==4.37.1 unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ mkdir checkpoints python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/resnet/resnetv1d50_b32x8_imagenet.py --weight resnetv1d50_b32x8_imagenet_20210531-db14775a.pth --output checkpoints/resnet_v1_d50.onnx \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh index 482924938efb2a72399df02d9704ea8dd34e82e6..b743d7084ae058118c29daaf494769fc293ceb41 100644 --- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh +++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh @@ -51,8 +51,6 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} -echo Model Input Name : ${MODEL_INPUT_NAME} -echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 @@ -71,34 +69,6 @@ else echo " "Generate ${SIM_MODEL} fi -# Quant Model -if [ $PRECISION == "int8" ];then - let step++ - echo; - echo [STEP ${step}] : Quant Model - if [[ -z ${QUANT_EXIST_ONNX} ]];then - QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx - fi - if [[ -f ${QUANT_EXIST_ONNX} ]];then - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed - else - python3 ${RUN_DIR}/quant.py \ - --model ${SIM_MODEL} \ - --model_name ${MODEL_NAME} \ - --dataset_dir ${DATASETS_DIR} \ - --observer ${QUANT_OBSERVER} \ - --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ - --save_dir $CHECKPOINTS_DIR \ - --bsz ${QUANT_BATCHSIZE} \ - --step ${QUANT_STEP} \ - --seed ${QUANT_SEED} \ - --imgsz ${IMGSIZE} - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Generate ${SIM_MODEL} - fi -fi - # Change Batchsize let step++ echo; @@ -141,4 +111,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} \ No newline at end of file +exit ${EXIT_STATUS} diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh index c843057dda987fd834e1fa0580deb2a8cdea17ce..e7a4f1a7276406a0ed7400af4368b5bec2a06e06 100644 --- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh +++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh @@ -51,8 +51,6 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} -echo Model Input Name : ${MODEL_INPUT_NAME} -echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 @@ -71,34 +69,6 @@ else echo " "Generate ${SIM_MODEL} fi -# Quant Model -if [ $PRECISION == "int8" ];then - let step++ - echo; - echo [STEP ${step}] : Quant Model - if [[ -z ${QUANT_EXIST_ONNX} ]];then - QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx - fi - if [[ -f ${QUANT_EXIST_ONNX} ]];then - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed - else - python3 ${RUN_DIR}/quant.py \ - --model ${SIM_MODEL} \ - --model_name ${MODEL_NAME} \ - --dataset_dir ${DATASETS_DIR} \ - --observer ${QUANT_OBSERVER} \ - --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ - --save_dir $CHECKPOINTS_DIR \ - --bsz ${QUANT_BATCHSIZE} \ - --step ${QUANT_STEP} \ - --seed ${QUANT_SEED} \ - --imgsz ${IMGSIZE} - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Generate ${SIM_MODEL} - fi -fi - # Change Batchsize let step++ echo; @@ -141,4 +111,4 @@ python3 ${RUN_DIR}/inference.py \ --fps_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} \ No newline at end of file +exit ${EXIT_STATUS} diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh index a66d6a253206c49ad68752793ffb1bd7b7f12958..df1fdc610c2332f33d210e6f417cf44da7fef7bd 100644 --- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh +++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh @@ -13,6 +13,7 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +set -x EXIT_STATUS=0 check_status() { @@ -28,7 +29,7 @@ WARM_UP=0 LOOP_COUNT=-1 RUN_MODE=ACC PRECISION=int8 - +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python # Update arguments index=0 options=$@ @@ -43,6 +44,7 @@ do done source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -60,16 +62,15 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ -echo; -echo [STEP ${step}] : Simplify Model -if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed -else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} -fi + echo [STEP ${step}] : Simplify Model + if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed + else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} + fi # Quant Model if [ $PRECISION == "int8" ];then @@ -83,7 +84,7 @@ if [ $PRECISION == "int8" ];then SIM_MODEL=${QUANT_EXIST_ONNX} echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed else - python3 ${RUN_DIR}/quant.py \ + python3 ${RUN_DIR}/quant_i8.py \ --model ${SIM_MODEL} \ --model_name ${MODEL_NAME} \ --dataset_dir ${DATASETS_DIR} \ @@ -120,15 +121,15 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 ${RUN_DIR}/build_engine.py \ - --precision ${PRECISION} \ - --model ${FINAL_MODEL} \ + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ --engine ${ENGINE_FILE} echo " "Generate Engine ${ENGINE_FILE} fi # Inference -let step++ +# let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -141,4 +142,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} \ No newline at end of file +exit ${EXIT_STATUS} diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh index e578762eb996592b509a8eed995b15b227ae8a86..72ca157b222ba853eb530146099c0cc3bfbb68c9 100644 --- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh +++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh @@ -28,7 +28,7 @@ WARM_UP=3 LOOP_COUNT=20 RUN_MODE=FPS PRECISION=int8 - +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python # Update arguments index=0 options=$@ @@ -43,6 +43,7 @@ do done source ${CONFIG_DIR} +echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -60,7 +61,6 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ -echo; echo [STEP ${step}] : Simplify Model if [ -f ${SIM_MODEL} ];then echo " "Simplify Model, ${SIM_MODEL} has been existed @@ -83,7 +83,7 @@ if [ $PRECISION == "int8" ];then SIM_MODEL=${QUANT_EXIST_ONNX} echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed else - python3 ${RUN_DIR}/quant.py \ + python3 ${RUN_DIR}/quant_i8.py \ --model ${SIM_MODEL} \ --model_name ${MODEL_NAME} \ --dataset_dir ${DATASETS_DIR} \ @@ -120,15 +120,15 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 ${RUN_DIR}/build_engine.py \ - --precision ${PRECISION} \ - --model ${FINAL_MODEL} \ + python3 ${RUN_DIR}/build_i8_engine.py \ + --onnx ${FINAL_MODEL} \ + --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ --engine ${ENGINE_FILE} echo " "Generate Engine ${ENGINE_FILE} fi # Inference -let step++ +# let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -138,7 +138,7 @@ python3 ${RUN_DIR}/inference.py \ --warm_up=${WARM_UP} \ --loop_count ${LOOP_COUNT} \ --test_mode ${RUN_MODE} \ - --fps_target ${TGT} \ + --acc_target ${TGT} \ --bsz ${BSZ}; check_status exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh index cd826b9795c96c6b3156d80022667d8a60ab6715..b9671165ce53a144c0f9b16d1e54f3ba824723ab 100644 --- a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh @@ -27,4 +27,4 @@ fi pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export.py ../../ixrt_common/export.py --model-name resnext50_32x4d --weight resnext50_32x4d-7cdf4587.pth --output checkpoints/resnext50_32x4d.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name resnext50_32x4d --weight resnext50_32x4d-7cdf4587.pth --output checkpoints/resnext50_32x4d.onnx \ No newline at end of file diff --git a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh index 09fa1878415ca72f1fd17b9ca6e19b16926756f9..66c8f9d0525bc855866325817dd7ee87aad8989f 100644 --- a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh +++ b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh @@ -44,6 +44,6 @@ cd .. mkdir -p checkpoints ln -s /root/data/checkpoints/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth ./ -ln -s /root/data/datasets/coco ./ +ln -s /root/data/datasets/coco2017 ./ python3 solo_torch2onnx.py --cfg ./solo_r50_fpn_3x_coco.py --checkpoint ./solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth --batch_size 1 mv r50_solo_bs1_800x800.onnx ./checkpoints/r50_solo_bs1_800x800.onnx \ No newline at end of file diff --git a/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh b/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh index 700282231245007abf2aad21967073edd312c77e..5ddfdcb2a001023d573acfbeb67e7bb5b5f9e707 100644 --- a/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh +++ b/models/cv/multi_object_tracking/fastreid/igie/ci/prepare.sh @@ -18,7 +18,8 @@ set -x pip3 install -r requirements.txt -# clone fast-reid first +# install fast-reid +git clone https://github.com/JDAI-CV/fast-reid.git --depth=1 cd fast-reid pip3 install -r docs/requirements.txt diff --git a/models/cv/object_detection/foveabox/ixrt/ci/prepare.sh b/models/cv/object_detection/foveabox/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..507d455e4d7a3b7e169cf7c422331910ce8eaa88 --- /dev/null +++ b/models/cv/object_detection/foveabox/ixrt/ci/prepare.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi +pip3 install -r requirements.txt + +python3 export.py --weight fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth --cfg fovea_r50_fpn_4xb4-1x_coco.py --output foveabox.onnx + +onnxsim foveabox.onnx foveabox_opt.onnx \ No newline at end of file diff --git a/models/cv/object_detection/fsaf/ixrt/ci/prepare.sh b/models/cv/object_detection/fsaf/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..67155f5998e7d58e3116cd34e7dd8320b37f5437 --- /dev/null +++ b/models/cv/object_detection/fsaf/ixrt/ci/prepare.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi +pip3 install -r requirements.txt +# export onnx model +python3 export.py --weight fsaf_r50_fpn_1x_coco-94ccc51f.pth --cfg fsaf_r50_fpn_1x_coco.py --output fsaf.onnx + +# use onnxsim optimize onnx model +onnxsim fsaf.onnx fsaf_opt.onnx \ No newline at end of file diff --git a/models/cv/object_detection/hrnet/ixrt/ci/prepare.sh b/models/cv/object_detection/hrnet/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..cbc14791567345842f24b3082733dee73dd56776 --- /dev/null +++ b/models/cv/object_detection/hrnet/ixrt/ci/prepare.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi +pip3 install -r requirements.txt +# export onnx model +python3 export.py --weight fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth --cfg fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco.py --output hrnet.onnx + +# Use onnxsim optimize onnx model +onnxsim hrnet.onnx hrnet_opt.onnx \ No newline at end of file diff --git a/models/cv/object_detection/retinaface/igie/inference.py b/models/cv/object_detection/retinaface/igie/inference.py index 44d29d1b9e1c8075c1212cecbe13ed683ebf1d7f..86782697f78fd90308c571d50e70410dcc245344 100644 --- a/models/cv/object_detection/retinaface/igie/inference.py +++ b/models/cv/object_detection/retinaface/igie/inference.py @@ -132,7 +132,7 @@ class FaceDataset(Dataset): return np.concatenate([i[None] for i in im], axis=0), path, shapes, path_ori def _load_image(self, i): - im = cv2.imread(self.img_dir+'/images'+self.imgs_path[i], cv2.IMREAD_COLOR) + im = cv2.imread(self.img_dir+'/images/'+self.imgs_path[i], cv2.IMREAD_COLOR) h0, w0 = im.shape[:2] r = self.image_size / max(h0, w0) if r != 1: diff --git a/models/cv/object_detection/retinaface/ixrt/README.md b/models/cv/object_detection/retinaface/ixrt/README.md index 67ce9e3cdccf9bc8326ff98572515ebddce4fa9e..2323b20fe2d009e7c9ad217f858084e196a524ec 100644 --- a/models/cv/object_detection/retinaface/ixrt/README.md +++ b/models/cv/object_detection/retinaface/ixrt/README.md @@ -47,7 +47,7 @@ python3 torch2onnx.py --model mobilenet0.25_Final.pth --onnx_model mnetv1_retina ```bash export DATASETS_DIR=/Path/to/widerface/ -export GT_DIR=../igie/ground_truth +export GT_DIR=../igie/widerface_evaluate/ground_truth ``` ### FP16 diff --git a/models/cv/object_detection/retinaface/ixrt/ci/prepare.sh b/models/cv/object_detection/retinaface/ixrt/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..0796d90b857d7d36426bdfd9e38631681b9c0c04 --- /dev/null +++ b/models/cv/object_detection/retinaface/ixrt/ci/prepare.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi +pip3 install -r requirements.txt + +python3 setup.py build_ext --inplace +# export onnx model +python3 torch2onnx.py --model mobilenet0.25_Final.pth --onnx_model mnetv1_retinaface.onnx \ No newline at end of file diff --git a/models/cv/object_detection/retinaface/ixrt/evaluation.py b/models/cv/object_detection/retinaface/ixrt/evaluation.py index d3c6495aaf782725761a52c9d2442564b482155d..8d1a9e0731532faf547aa96417edc1a6da2fda9b 100644 --- a/models/cv/object_detection/retinaface/ixrt/evaluation.py +++ b/models/cv/object_detection/retinaface/ixrt/evaluation.py @@ -289,5 +289,7 @@ def evaluation(pred, gt_path, iou_thresh=0.5): print("Medium Val AP: {}".format(aps[1])) print("Hard Val AP: {}".format(aps[2])) print("=================================================") + metricResult = {"metricResult": {"Easy Val AP": aps[0], "Medium Val AP": aps[1], "Hard Val AP": aps[2]}} + print(metricResult) return aps[0] \ No newline at end of file diff --git a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh index 5722980c09728dc0e0fc0bd131bc51c126fa0290..3fca161ad63023affdf118ba3e312ce94455b13b 100644 --- a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh +++ b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_accuracy.sh @@ -44,6 +44,7 @@ do done PROJ_DIR=${PROJ_DIR:-"."} +GT_DIR=${GT_DIR:-"../igie/widerface_evaluate/ground_truth"} DATASETS_DIR="${DATASETS_DIR}" CHECKPOINTS_DIR="${PROJ_DIR}" RUN_DIR="${PROJ_DIR}" diff --git a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh index ddbcc65577d11410149adeb1c08cdbb305651ce5..cb486381f32bd88954137ccb6a4c44d963c50382 100644 --- a/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh +++ b/models/cv/object_detection/retinaface/ixrt/scripts/infer_retinaface_fp16_performance.sh @@ -44,6 +44,7 @@ do done PROJ_DIR=${PROJ_DIR:-"."} +GT_DIR=${GT_DIR:-"../igie/widerface_evaluate/ground_truth"} DATASETS_DIR="${DATASETS_DIR}" CHECKPOINTS_DIR="${PROJ_DIR}" RUN_DIR="${PROJ_DIR}" diff --git a/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py b/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py index 8095cb6f05897c19d387230755b32c1ae6ad3352..e383834f688e59746408f3873d0c3794db5e43bd 100644 --- a/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py +++ b/models/cv/object_detection/retinaface/ixrt/wider_face_dataset.py @@ -48,9 +48,9 @@ class WiderFaceDetection(data.Dataset): def __init__(self, prj_dir, preproc=lt_preproc, input_size=(320, 320)): self.preproc = preproc self.input_size = input_size - self.image_dir = os.path.join(prj_dir, "images") + self.image_dir = os.path.join(prj_dir, "val/images") - testset_list = os.path.join(prj_dir, "wider_val.txt") + testset_list = os.path.join(prj_dir, "val/wider_val.txt") with open(testset_list, 'r') as fr: self.imgs_path = fr.read().split() diff --git a/models/cv/object_detection/yolov3/igie/requirements.txt b/models/cv/object_detection/yolov3/igie/requirements.txt index 171602527bdea43ee2216f9ad4629d83cfd92e38..d58360efcf45ff6176a24cc37580d08ef176381b 100644 --- a/models/cv/object_detection/yolov3/igie/requirements.txt +++ b/models/cv/object_detection/yolov3/igie/requirements.txt @@ -1,5 +1,5 @@ tqdm onnx onnxsim -ultralytics +ultralytics==8.3.97 pycocotools diff --git a/models/cv/object_detection/yolov5/igie/requirements.txt b/models/cv/object_detection/yolov5/igie/requirements.txt index 171602527bdea43ee2216f9ad4629d83cfd92e38..d58360efcf45ff6176a24cc37580d08ef176381b 100644 --- a/models/cv/object_detection/yolov5/igie/requirements.txt +++ b/models/cv/object_detection/yolov5/igie/requirements.txt @@ -1,5 +1,5 @@ tqdm onnx onnxsim -ultralytics +ultralytics==8.3.97 pycocotools diff --git a/models/cv/object_detection/yolov5/ixrt/requirements.txt b/models/cv/object_detection/yolov5/ixrt/requirements.txt index b0f4374b2b778c81875da50d088fecedd01689c9..10a9fba6a70545eee20ab0db7bb740b1d4807f95 100644 --- a/models/cv/object_detection/yolov5/ixrt/requirements.txt +++ b/models/cv/object_detection/yolov5/ixrt/requirements.txt @@ -1,7 +1,7 @@ tqdm onnx onnxsim -ultralytics +ultralytics==8.3.97 pycocotools opencv-python==4.6.0.66 pycuda \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/ixrt/requirements.txt b/models/cv/object_detection/yolov5s/ixrt/requirements.txt index ffb8ce179fef26f79070045778708b03b8111fce..b1a10ab060644ea96d6ad77b36dbc4367a632591 100644 --- a/models/cv/object_detection/yolov5s/ixrt/requirements.txt +++ b/models/cv/object_detection/yolov5s/ixrt/requirements.txt @@ -1,6 +1,6 @@ tqdm onnx onnxsim -ultralytics +ultralytics==8.3.97 pycocotools pycuda \ No newline at end of file diff --git a/models/cv/object_detection/yolov7/ixrt/requirements.txt b/models/cv/object_detection/yolov7/ixrt/requirements.txt index b0f4374b2b778c81875da50d088fecedd01689c9..10a9fba6a70545eee20ab0db7bb740b1d4807f95 100644 --- a/models/cv/object_detection/yolov7/ixrt/requirements.txt +++ b/models/cv/object_detection/yolov7/ixrt/requirements.txt @@ -1,7 +1,7 @@ tqdm onnx onnxsim -ultralytics +ultralytics==8.3.97 pycocotools opencv-python==4.6.0.66 pycuda \ No newline at end of file diff --git a/models/cv/ocr/kie_layoutxlm/igie/requirements.txt b/models/cv/ocr/kie_layoutxlm/igie/requirements.txt index ede2fc9df150acb6ef4821e922de2ea645355f7d..2f2f00126b5801c5996ec60782011f59875bf740 100644 --- a/models/cv/ocr/kie_layoutxlm/igie/requirements.txt +++ b/models/cv/ocr/kie_layoutxlm/igie/requirements.txt @@ -5,6 +5,6 @@ Polygon3 paddlenlp==2.8.1 lanms-neo==1.0.2 paddleocr==2.6.0 -paddle2onnx +paddle2onnx==1.3.0 python-bidi protobuf==3.20.3 \ No newline at end of file diff --git a/models/multimodal/vision_language_model/aria/vllm/README.md b/models/multimodal/vision_language_model/aria/vllm/README.md index ae768ce6f06f8558014b34aaee00df7d45855751..7ef43e546b4ecc15f06f2d5dc7d7e8acc0aa404c 100644 --- a/models/multimodal/vision_language_model/aria/vllm/README.md +++ b/models/multimodal/vision_language_model/aria/vllm/README.md @@ -37,6 +37,8 @@ In order to run the model smoothly, you need to get the sdk from [resource cente yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx + +pip install transformer==4.48.0 ``` ## Model Inference diff --git a/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh index 7232aa2996f379a961cf931968a1319fb70ac091..ff5f4e533365daff8abc4c396d8b9206ec3cd6b3 100644 --- a/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh +++ b/models/multimodal/vision_language_model/aria/vllm/ci/prepare.sh @@ -25,3 +25,4 @@ else fi cp -r ../../vllm_public_assets/ ./ +pip install transformer==4.48.0 \ No newline at end of file diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md b/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md index 1ed7c9116c970df30b47800496835aac9a0016c9..7a488b0a320202b0914a55f724627eb98eb482b6 100755 --- a/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md +++ b/models/multimodal/vision_language_model/chameleon_7b/vllm/README.md @@ -17,6 +17,8 @@ Chameleon, an AI system that mitigates these limitations by augmenting LLMs with - Model: ```bash +cp -r ../../vllm_public_assets/ ./ + # Download model from the website and make sure the model's path is "data/chameleon-7b" mkdir data ``` diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/chameleon_7b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091 --- /dev/null +++ b/models/multimodal/vision_language_model/chameleon_7b/vllm/ci/prepare.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +cp -r ../../vllm_public_assets/ ./ diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/utils.py b/models/multimodal/vision_language_model/chameleon_7b/vllm/utils.py deleted file mode 100644 index 48445ed97d08a8388a90d20e026609b5c1e88a99..0000000000000000000000000000000000000000 --- a/models/multimodal/vision_language_model/chameleon_7b/vllm/utils.py +++ /dev/null @@ -1,224 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import codecs -import logging - -""" -The following arguments can not be add in args... -early_stopping: Union[bool, str] = False, -early_stopping: Controls the stopping condition for beam search. It - accepts the following values: `True`, where the generation stops as - soon as there are `best_of` complete candidates; `False`, where an - heuristic is applied and the generation stops when is it very - unlikely to find better candidates; `"never"`, where the beam search - procedure only stops when there cannot be better candidates - (canonical beam search algorithm). -stop: Optional[Union[str, List[str]]] = None, -stop_token_ids: Optional[List[int]] = None, -logits_processors: Optional[List[LogitsProcessor]] = None, -logits_processors: List of functions that modify logits based on - previously generated tokens, and optionally prompt tokens as - a first argument. -truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None, -truncate_prompt_tokens: If set to an integer k, will use only the last k - tokens from the prompt (i.e., left truncation). Defaults to None - (i.e., no truncation). - """ - - -def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser: - args.add_argument( - "--n", - type=int, - default=1, - help="Number of output sequences to return for the given prompt.", - ) - args.add_argument( - "--best-of", - type=int, - default=None, - help="Number of output sequences that are generated from the prompt. " - "From these `best_of` sequences, the top `n` sequences are returned. " - "`best_of` must be greater than or equal to `n`. This is treated as " - "the beam width when `use_beam_search` is True. By default, `best_of`" - "is set to `n`.", - ) - args.add_argument( - "--presence-penalty", - type=float, - default=0.0, - help="Float that penalizes new tokens based on whether they " - "appear in the generated text so far. Values > 0 encourage the model " - "to use new tokens, while values < 0 encourage the model to repeat " - "tokens.", - ) - args.add_argument( - "--frequency-penalty", - type=float, - default=0.0, - help="Float that penalizes new tokens based on their " - " frequency in the generated text so far. Values > 0 encourage the " - " model to use new tokens, while values < 0 encourage the model to " - "repeat tokens.", - ) - args.add_argument( - "--repetition-penalty", - type=float, - default=1.0, - help="Float that penalizes new tokens based on whether " - "they appear in the prompt and the generated text so far. Values > 1 " - "encourage the model to use new tokens, while values < 1 encourage " - "the model to repeat tokens.", - ) - args.add_argument( - "--temperature", - type=float, - default=1.0, - help="Float that controls the randomness of the sampling. Lower " - "values make the model more deterministic, while higher values make " - "the model more random. Zero means greedy sampling.", - ) - args.add_argument( - "--top-p", - type=float, - default=1.0, - help="Float that controls the cumulative probability of the top tokens " - "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.", - ) - args.add_argument( - "--top-k", - type=int, - default=-1, - help="Integer that controls the number of top tokens to consider. Set " - "to -1 to consider all tokens.", - ) - args.add_argument( - "--min-p", - type=float, - default=0.0, - help="Float that represents the minimum probability for a token to be " - "considered, relative to the probability of the most likely token. " - "Must be in [0, 1]. Set to 0 to disable this.", - ) - args.add_argument( - "--use-beam-search", - default=False, - action="store_true", - help="Whether to use beam search instead of sampling.", - ) - args.add_argument( - "--length-penalty", - type=float, - default=1.0, - help="Float that penalizes sequences based on their length. Used in beam search.", - ) - args.add_argument( - "--stop", - type=str, - default=None, - help="List of strings that stop the generation when they are generated. " - "The returned output will not contain the stop strings.", - ) - args.add_argument( - "--stop-token-ids", - type=int, - default=None, - help="List of tokens that stop the generation when they are " - "generated. The returned output will contain the stop tokens unless " - "the stop tokens are special tokens.", - ) - args.add_argument( - "--include-stop-str-in-output", - default=False, - action="store_true", - help="Whether to include the stop strings in output text. Defaults to False.", - ) - args.add_argument( - "--ignore-eos", - default=False, - action="store_true", - help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.", - ) - args.add_argument( - "--max-tokens", - type=int, - default=16, - help="Maximum number of tokens to generate per output sequence.", - ) - args.add_argument( - "--min-tokens", - type=int, - default=0, - help="Minimum number of tokens to generate per output sequence " - "before EOS or stop_token_ids can be generated", - ) - args.add_argument( - "--logprobs", - type=int, - default=None, - help="NNumber of log probabilities to return per output token. " - "Note that the implementation follows the OpenAI API: The return " - "result includes the log probabilities on the `logprobs` most likely " - "tokens, as well the chosen tokens. The API will always return the " - "log probability of the sampled token, so there may be up to " - "`logprobs+1` elements in the response.", - ) - args.add_argument( - "--prompt-logprobs", - type=int, - default=None, - help="Number of log probabilities to return per prompt token.", - ) - args.add_argument( - "--detokenize", - type=bool, - default=True, - help="Whether to detokenize the output. Defaults to True.", - ) - args.add_argument( - "--skip-special-tokens", - default=True, - action="store_false", - help="Whether to skip special tokens in the output.", - ) - args.add_argument( - "--spaces-between-special-tokens", - default=True, - action="store_false", - help="Whether to add spaces between special tokens in the output. Defaults to True.", - ) - return args - - -def load_chat_template(tokenizer, chat_template): - if chat_template is not None: - try: - with open(chat_template, "r") as f: - tokenizer.chat_template = f.read() - except OSError: - # If opening a file fails, set chat template to be args to - # ensure we decode so our escape are interpreted correctly - tokenizer.chat_template = codecs.decode(chat_template, "unicode_escape") - - logging.info(f"Using supplied chat template:\n{tokenizer.chat_template}") - elif tokenizer.chat_template is not None: - logging.info( - f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm." - ) - else: - logging.warning( - "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm." - ) \ No newline at end of file diff --git a/models/multimodal/vision_language_model/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg deleted file mode 100644 index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000 Binary files a/models/multimodal/vision_language_model/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md b/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md index f751f8c4db94a5b7c1e170ead59ec7ad40fcfc9c..d13e0b364e215b3c4479edd6f0ee8072977f1e36 100755 --- a/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md +++ b/models/multimodal/vision_language_model/fuyu_8b/vllm/README.md @@ -21,6 +21,8 @@ transformer decoder like an image transformer (albeit with no pooling and causal - Model: ```bash +cp -r ../../vllm_public_assets/ ./ + # Download model from the website and make sure the model's path is "data/fuyu-8b" mkdir data/ ``` diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091 --- /dev/null +++ b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +cp -r ../../vllm_public_assets/ ./ diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/utils.py b/models/multimodal/vision_language_model/fuyu_8b/vllm/utils.py deleted file mode 100644 index 48445ed97d08a8388a90d20e026609b5c1e88a99..0000000000000000000000000000000000000000 --- a/models/multimodal/vision_language_model/fuyu_8b/vllm/utils.py +++ /dev/null @@ -1,224 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import codecs -import logging - -""" -The following arguments can not be add in args... -early_stopping: Union[bool, str] = False, -early_stopping: Controls the stopping condition for beam search. It - accepts the following values: `True`, where the generation stops as - soon as there are `best_of` complete candidates; `False`, where an - heuristic is applied and the generation stops when is it very - unlikely to find better candidates; `"never"`, where the beam search - procedure only stops when there cannot be better candidates - (canonical beam search algorithm). -stop: Optional[Union[str, List[str]]] = None, -stop_token_ids: Optional[List[int]] = None, -logits_processors: Optional[List[LogitsProcessor]] = None, -logits_processors: List of functions that modify logits based on - previously generated tokens, and optionally prompt tokens as - a first argument. -truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None, -truncate_prompt_tokens: If set to an integer k, will use only the last k - tokens from the prompt (i.e., left truncation). Defaults to None - (i.e., no truncation). - """ - - -def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser: - args.add_argument( - "--n", - type=int, - default=1, - help="Number of output sequences to return for the given prompt.", - ) - args.add_argument( - "--best-of", - type=int, - default=None, - help="Number of output sequences that are generated from the prompt. " - "From these `best_of` sequences, the top `n` sequences are returned. " - "`best_of` must be greater than or equal to `n`. This is treated as " - "the beam width when `use_beam_search` is True. By default, `best_of`" - "is set to `n`.", - ) - args.add_argument( - "--presence-penalty", - type=float, - default=0.0, - help="Float that penalizes new tokens based on whether they " - "appear in the generated text so far. Values > 0 encourage the model " - "to use new tokens, while values < 0 encourage the model to repeat " - "tokens.", - ) - args.add_argument( - "--frequency-penalty", - type=float, - default=0.0, - help="Float that penalizes new tokens based on their " - " frequency in the generated text so far. Values > 0 encourage the " - " model to use new tokens, while values < 0 encourage the model to " - "repeat tokens.", - ) - args.add_argument( - "--repetition-penalty", - type=float, - default=1.0, - help="Float that penalizes new tokens based on whether " - "they appear in the prompt and the generated text so far. Values > 1 " - "encourage the model to use new tokens, while values < 1 encourage " - "the model to repeat tokens.", - ) - args.add_argument( - "--temperature", - type=float, - default=1.0, - help="Float that controls the randomness of the sampling. Lower " - "values make the model more deterministic, while higher values make " - "the model more random. Zero means greedy sampling.", - ) - args.add_argument( - "--top-p", - type=float, - default=1.0, - help="Float that controls the cumulative probability of the top tokens " - "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.", - ) - args.add_argument( - "--top-k", - type=int, - default=-1, - help="Integer that controls the number of top tokens to consider. Set " - "to -1 to consider all tokens.", - ) - args.add_argument( - "--min-p", - type=float, - default=0.0, - help="Float that represents the minimum probability for a token to be " - "considered, relative to the probability of the most likely token. " - "Must be in [0, 1]. Set to 0 to disable this.", - ) - args.add_argument( - "--use-beam-search", - default=False, - action="store_true", - help="Whether to use beam search instead of sampling.", - ) - args.add_argument( - "--length-penalty", - type=float, - default=1.0, - help="Float that penalizes sequences based on their length. Used in beam search.", - ) - args.add_argument( - "--stop", - type=str, - default=None, - help="List of strings that stop the generation when they are generated. " - "The returned output will not contain the stop strings.", - ) - args.add_argument( - "--stop-token-ids", - type=int, - default=None, - help="List of tokens that stop the generation when they are " - "generated. The returned output will contain the stop tokens unless " - "the stop tokens are special tokens.", - ) - args.add_argument( - "--include-stop-str-in-output", - default=False, - action="store_true", - help="Whether to include the stop strings in output text. Defaults to False.", - ) - args.add_argument( - "--ignore-eos", - default=False, - action="store_true", - help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.", - ) - args.add_argument( - "--max-tokens", - type=int, - default=16, - help="Maximum number of tokens to generate per output sequence.", - ) - args.add_argument( - "--min-tokens", - type=int, - default=0, - help="Minimum number of tokens to generate per output sequence " - "before EOS or stop_token_ids can be generated", - ) - args.add_argument( - "--logprobs", - type=int, - default=None, - help="NNumber of log probabilities to return per output token. " - "Note that the implementation follows the OpenAI API: The return " - "result includes the log probabilities on the `logprobs` most likely " - "tokens, as well the chosen tokens. The API will always return the " - "log probability of the sampled token, so there may be up to " - "`logprobs+1` elements in the response.", - ) - args.add_argument( - "--prompt-logprobs", - type=int, - default=None, - help="Number of log probabilities to return per prompt token.", - ) - args.add_argument( - "--detokenize", - type=bool, - default=True, - help="Whether to detokenize the output. Defaults to True.", - ) - args.add_argument( - "--skip-special-tokens", - default=True, - action="store_false", - help="Whether to skip special tokens in the output.", - ) - args.add_argument( - "--spaces-between-special-tokens", - default=True, - action="store_false", - help="Whether to add spaces between special tokens in the output. Defaults to True.", - ) - return args - - -def load_chat_template(tokenizer, chat_template): - if chat_template is not None: - try: - with open(chat_template, "r") as f: - tokenizer.chat_template = f.read() - except OSError: - # If opening a file fails, set chat template to be args to - # ensure we decode so our escape are interpreted correctly - tokenizer.chat_template = codecs.decode(chat_template, "unicode_escape") - - logging.info(f"Using supplied chat template:\n{tokenizer.chat_template}") - elif tokenizer.chat_template is not None: - logging.info( - f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm." - ) - else: - logging.warning( - "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm." - ) \ No newline at end of file diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg deleted file mode 100644 index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000 Binary files a/models/multimodal/vision_language_model/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/README.md b/models/multimodal/vision_language_model/intern_vl/vllm/README.md index 78bb8d1b1297bba864816057c0192193e85f8849..c337a34094d9a2c4666cb2d3126aa3f64dcccc2d 100644 --- a/models/multimodal/vision_language_model/intern_vl/vllm/README.md +++ b/models/multimodal/vision_language_model/intern_vl/vllm/README.md @@ -21,6 +21,7 @@ learning. ```bash cd ${DeepSparkInference}/models/vision-language-understanding/Intern_VL/vllm +cp -r ../../vllm_public_assets/ ./ mkdir -p data/intern_vl ln -s /path/to/InternVL2-4B ./data/intern_vl ``` diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/intern_vl/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091 --- /dev/null +++ b/models/multimodal/vision_language_model/intern_vl/vllm/ci/prepare.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +cp -r ../../vllm_public_assets/ ./ diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/utils.py b/models/multimodal/vision_language_model/intern_vl/vllm/utils.py deleted file mode 100644 index c6def85dedc08ef9c3a489ce9dc5b1ff4a5e48b0..0000000000000000000000000000000000000000 --- a/models/multimodal/vision_language_model/intern_vl/vllm/utils.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import codecs -import logging -import argparse - - -def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser: - args.add_argument( - '--n', - type=int, - default=1, - help="Number of output sequences to return for the given prompt.") - args.add_argument( - '--best-of', - type=int, - default=None, - help="Number of output sequences that are generated from the prompt. " - "From these `best_of` sequences, the top `n` sequences are returned. " - "`best_of` must be greater than or equal to `n`. This is treated as " - "the beam width when `use_beam_search` is True. By default, `best_of`" - "is set to `n`.") - args.add_argument( - '--presence-penalty', - type=float, - default=0.0, - help="Float that penalizes new tokens based on whether they " - "appear in the generated text so far. Values > 0 encourage the model " - "to use new tokens, while values < 0 encourage the model to repeat " - "tokens.") - args.add_argument( - '--frequency-penalty', - type=float, - default=0.0, - help="Float that penalizes new tokens based on their " - " frequency in the generated text so far. Values > 0 encourage the " - " model to use new tokens, while values < 0 encourage the model to " - "repeat tokens.") - args.add_argument( - '--repetition-penalty', - type=float, - default=1.0, - help="Float that penalizes new tokens based on whether " - "they appear in the prompt and the generated text so far. Values > 1 " - "encourage the model to use new tokens, while values < 1 encourage " - "the model to repeat tokens.") - args.add_argument( - '--temperature', - type=float, - default=1.0, - help="Float that controls the randomness of the sampling. Lower " - "values make the model more deterministic, while higher values make " - "the model more random. Zero means greedy sampling.") - args.add_argument( - '--top-p', - type=float, - default=1.0, - help="Float that controls the cumulative probability of the top tokens " - "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.") - args.add_argument( - '--top-k', - type=int, - default=-1, - help="Integer that controls the number of top tokens to consider. Set " - "to -1 to consider all tokens.") - args.add_argument( - '--min-p', - type=float, - default=0.0, - help="Float that represents the minimum probability for a token to be " - "considered, relative to the probability of the most likely token. " - "Must be in [0, 1]. Set to 0 to disable this.") - args.add_argument( - '--use-beam-search', - default=False, - action="store_true", - help="Whether to use beam search instead of sampling.") - args.add_argument( - '--length-penalty', - type=float, - default=1.0, - help="Float that penalizes sequences based on their length. Used in beam search.") - args.add_argument( - '--stop', - type=str, - default=None, - help="List of strings that stop the generation when they are generated. " - "The returned output will not contain the stop strings.") - args.add_argument( - '--stop-token-ids', - type=int, - default=None, - help="List of tokens that stop the generation when they are " - "generated. The returned output will contain the stop tokens unless " - "the stop tokens are special tokens.") - args.add_argument( - '--include-stop-str-in-output', - default=False, - action="store_true", - help="Whether to include the stop strings in output text. Defaults to False.") - args.add_argument( - '--ignore-eos', - default=False, - action="store_true", - help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.") - args.add_argument( - '--max-tokens', - type=int, - default=16, - help="Maximum number of tokens to generate per output sequence.") - args.add_argument( - '--logprobs', - type=int, - default=None, - help="NNumber of log probabilities to return per output token. " - "Note that the implementation follows the OpenAI API: The return " - "result includes the log probabilities on the `logprobs` most likely " - "tokens, as well the chosen tokens. The API will always return the " - "log probability of the sampled token, so there may be up to " - "`logprobs+1` elements in the response.") - args.add_argument( - '--prompt-logprobs', - type=int, - default=None, - help="Number of log probabilities to return per prompt token.") - args.add_argument( - '--skip-special-tokens', - default=True, - action="store_false", - help="Whether to skip special tokens in the output.") - args.add_argument( - '--spaces-between-special-tokens', - default=True, - action="store_false", - help="Whether to add spaces between special tokens in the output. Defaults to True.") - # early_stopping logits_processors seed - return args - - -def load_chat_template(tokenizer, chat_template): - if chat_template is not None: - try: - with open(chat_template, "r") as f: - tokenizer.chat_template = f.read() - except OSError: - # If opening a file fails, set chat template to be args to - # ensure we decode so our escape are interpreted correctly - tokenizer.chat_template = codecs.decode( - chat_template, "unicode_escape") - - logging.info( - f"Using supplied chat template:\n{tokenizer.chat_template}" - ) - elif tokenizer.chat_template is not None: - logging.info( - f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm." - ) - else: - logging.warning( - "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm.") diff --git a/models/multimodal/vision_language_model/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg deleted file mode 100644 index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000 Binary files a/models/multimodal/vision_language_model/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ diff --git a/models/multimodal/vision_language_model/llava/vllm/README.md b/models/multimodal/vision_language_model/llava/vllm/README.md index 599b66f04af0d6d093fd96be05febbad896292fd..78a2119013b612c6e26f517339cf634fa1677b54 100644 --- a/models/multimodal/vision_language_model/llava/vllm/README.md +++ b/models/multimodal/vision_language_model/llava/vllm/README.md @@ -22,6 +22,8 @@ reasoning. -llava-v1.6-vicuna-7b-hf: ```bash +cp -r ../../vllm_public_assets/ ./ + # Download model from the website and make sure the model's path is "data/llava" mkdir data/ ``` diff --git a/models/multimodal/vision_language_model/llava/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/llava/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..7232aa2996f379a961cf931968a1319fb70ac091 --- /dev/null +++ b/models/multimodal/vision_language_model/llava/vllm/ci/prepare.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +cp -r ../../vllm_public_assets/ ./ diff --git a/models/multimodal/vision_language_model/llava/vllm/utils.py b/models/multimodal/vision_language_model/llava/vllm/utils.py deleted file mode 100644 index 11f23209a3175f0200ac6b5c499765101e3c3a0a..0000000000000000000000000000000000000000 --- a/models/multimodal/vision_language_model/llava/vllm/utils.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import codecs -import logging - -""" -The following arguments can not be add in args... -early_stopping: Union[bool, str] = False, -early_stopping: Controls the stopping condition for beam search. It - accepts the following values: `True`, where the generation stops as - soon as there are `best_of` complete candidates; `False`, where an - heuristic is applied and the generation stops when is it very - unlikely to find better candidates; `"never"`, where the beam search - procedure only stops when there cannot be better candidates - (canonical beam search algorithm). -stop: Optional[Union[str, List[str]]] = None, -stop_token_ids: Optional[List[int]] = None, -logits_processors: Optional[List[LogitsProcessor]] = None, -logits_processors: List of functions that modify logits based on - previously generated tokens, and optionally prompt tokens as - a first argument. -truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None, -truncate_prompt_tokens: If set to an integer k, will use only the last k - tokens from the prompt (i.e., left truncation). Defaults to None - (i.e., no truncation). - """ - - -def sampling_add_cli_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser: - args.add_argument( - "--n", - type=int, - default=1, - help="Number of output sequences to return for the given prompt.", - ) - args.add_argument( - "--best-of", - type=int, - default=None, - help="Number of output sequences that are generated from the prompt. " - "From these `best_of` sequences, the top `n` sequences are returned. " - "`best_of` must be greater than or equal to `n`. This is treated as " - "the beam width when `use_beam_search` is True. By default, `best_of`" - "is set to `n`.", - ) - args.add_argument( - "--presence-penalty", - type=float, - default=0.0, - help="Float that penalizes new tokens based on whether they " - "appear in the generated text so far. Values > 0 encourage the model " - "to use new tokens, while values < 0 encourage the model to repeat " - "tokens.", - ) - args.add_argument( - "--frequency-penalty", - type=float, - default=0.0, - help="Float that penalizes new tokens based on their " - " frequency in the generated text so far. Values > 0 encourage the " - " model to use new tokens, while values < 0 encourage the model to " - "repeat tokens.", - ) - args.add_argument( - "--repetition-penalty", - type=float, - default=1.0, - help="Float that penalizes new tokens based on whether " - "they appear in the prompt and the generated text so far. Values > 1 " - "encourage the model to use new tokens, while values < 1 encourage " - "the model to repeat tokens.", - ) - args.add_argument( - "--temperature", - type=float, - default=1.0, - help="Float that controls the randomness of the sampling. Lower " - "values make the model more deterministic, while higher values make " - "the model more random. Zero means greedy sampling.", - ) - args.add_argument( - "--top-p", - type=float, - default=1.0, - help="Float that controls the cumulative probability of the top tokens " - "to consider. Must be in (0, 1]. Set to 1 to consider all tokens.", - ) - args.add_argument( - "--top-k", - type=int, - default=-1, - help="Integer that controls the number of top tokens to consider. Set " - "to -1 to consider all tokens.", - ) - args.add_argument( - "--min-p", - type=float, - default=0.0, - help="Float that represents the minimum probability for a token to be " - "considered, relative to the probability of the most likely token. " - "Must be in [0, 1]. Set to 0 to disable this.", - ) - args.add_argument( - "--use-beam-search", - default=False, - action="store_true", - help="Whether to use beam search instead of sampling.", - ) - args.add_argument( - "--length-penalty", - type=float, - default=1.0, - help="Float that penalizes sequences based on their length. Used in beam search.", - ) - args.add_argument( - "--stop", - type=str, - default=None, - help="List of strings that stop the generation when they are generated. " - "The returned output will not contain the stop strings.", - ) - args.add_argument( - "--stop-token-ids", - type=int, - default=None, - help="List of tokens that stop the generation when they are " - "generated. The returned output will contain the stop tokens unless " - "the stop tokens are special tokens.", - ) - args.add_argument( - "--include-stop-str-in-output", - default=False, - action="store_true", - help="Whether to include the stop strings in output text. Defaults to False.", - ) - args.add_argument( - "--ignore-eos", - default=False, - action="store_true", - help="Whether to ignore the EOS token and continue generating tokens after the EOS token is generated.", - ) - args.add_argument( - "--max-tokens", - type=int, - default=16, - help="Maximum number of tokens to generate per output sequence.", - ) - args.add_argument( - "--min-tokens", - type=int, - default=0, - help="Minimum number of tokens to generate per output sequence " - "before EOS or stop_token_ids can be generated", - ) - args.add_argument( - "--logprobs", - type=int, - default=None, - help="NNumber of log probabilities to return per output token. " - "Note that the implementation follows the OpenAI API: The return " - "result includes the log probabilities on the `logprobs` most likely " - "tokens, as well the chosen tokens. The API will always return the " - "log probability of the sampled token, so there may be up to " - "`logprobs+1` elements in the response.", - ) - args.add_argument( - "--prompt-logprobs", - type=int, - default=None, - help="Number of log probabilities to return per prompt token.", - ) - args.add_argument( - "--detokenize", - type=bool, - default=True, - help="Whether to detokenize the output. Defaults to True.", - ) - args.add_argument( - "--skip-special-tokens", - default=True, - action="store_false", - help="Whether to skip special tokens in the output.", - ) - args.add_argument( - "--spaces-between-special-tokens", - default=True, - action="store_false", - help="Whether to add spaces between special tokens in the output. Defaults to True.", - ) - return args - - -def load_chat_template(tokenizer, chat_template): - if chat_template is not None: - try: - with open(chat_template, "r") as f: - tokenizer.chat_template = f.read() - except OSError: - # If opening a file fails, set chat template to be args to - # ensure we decode so our escape are interpreted correctly - tokenizer.chat_template = codecs.decode(chat_template, "unicode_escape") - - logging.info(f"Using supplied chat template:\n{tokenizer.chat_template}") - elif tokenizer.chat_template is not None: - logging.info( - f"Using default chat template:\n{tokenizer.chat_template}. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm." - ) - else: - logging.warning( - "No chat template provided. Chat API will not work. This May lead to unsatisfactory results. You can provide a template.jinja file for vllm." - ) \ No newline at end of file diff --git a/models/multimodal/vision_language_model/llava/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_model/llava/vllm/vllm_public_assets/cherry_blossom.jpg deleted file mode 100644 index 63173db0da7687d7841fe4d85239d8e277d81259..0000000000000000000000000000000000000000 Binary files a/models/multimodal/vision_language_model/llava/vllm/vllm_public_assets/cherry_blossom.jpg and /dev/null differ diff --git a/models/multimodal/vision_language_model/llava_next_video_7b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/llava_next_video_7b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..0fa3df9b4017331b2579cf5e039676248f79fff9 --- /dev/null +++ b/models/multimodal/vision_language_model/llava_next_video_7b/vllm/ci/prepare.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi diff --git a/models/nlp/plm/albert/ixrt/ci/prepare.sh b/models/nlp/plm/albert/ixrt/ci/prepare.sh index d78865ec0c31e4dbb393d2d89b4d4ac6a2ce391d..68e8aa19da2132447fdfe6ea48f42bc026f48d7c 100644 --- a/models/nlp/plm/albert/ixrt/ci/prepare.sh +++ b/models/nlp/plm/albert/ixrt/ci/prepare.sh @@ -35,7 +35,6 @@ pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requ # edit madlag/albert-base-v2-squad path # sed -i "s#madlag#/${MODEL_PATH}/madlag#" ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py -mv madlag ./ByteMLPerf/byte_infer_perf/general_perf/ # copy open_squad data cp /root/data/datasets/open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/ diff --git a/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh b/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh index 36c5cea9aa366c78077003c3271f4fd402021dd0..3ebc27f17f276362647a9716fcc7aad4e9d77e32 100644 --- a/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh +++ b/models/nlp/plm/bert_base_ner/igie/ci/prepare.sh @@ -16,6 +16,7 @@ set -x +pip3 uninstall numpy pip3 install -r requirements.txt # Get pytorch weights diff --git a/models/nlp/plm/roformer/ixrt/ci/prepare.sh b/models/nlp/plm/roformer/ixrt/ci/prepare.sh index c3cc4f3d2e12028623cbd00969ac39960db5b490..ea80462db022331cb8b9c20f12a15e9ef8b0bdd6 100644 --- a/models/nlp/plm/roformer/ixrt/ci/prepare.sh +++ b/models/nlp/plm/roformer/ixrt/ci/prepare.sh @@ -28,7 +28,8 @@ python3 export_onnx.py --model_path ./data/open_roformer --output_path ./data/op # Simplify onnx model onnxsim ./data/open_roformer/roformer-frozen_org.onnx ./data/open_roformer/roformer-frozen.onnx -python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer.onnx +python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer-frozen.onnx +cp ./data/open_roformer/roformer-frozen.onnx ./data/open_roformer/roformer.onnx # link ByteMLPerf and install requirements ln -s ../../../../../toolbox/ByteMLPerf ./ @@ -39,7 +40,7 @@ sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_ # Move open_roformer mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/ -mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/ +cp -r ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/ # Setup open_cail2019 dataset cp /root/data/datasets/open_cail2019/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019 diff --git a/models/others/recommendation/wide_and_deep/ixrt/README.md b/models/others/recommendation/wide_and_deep/ixrt/README.md index 8e9dd17d650123ead68c985290075b5c912be8ac..22796241f671d6bd7ff4280666270ea572dd8efb 100644 --- a/models/others/recommendation/wide_and_deep/ixrt/README.md +++ b/models/others/recommendation/wide_and_deep/ixrt/README.md @@ -56,7 +56,7 @@ export PROJ_PATH=./ #### FP16 ```bash -bash scripts/infer_widedeep_fp16_performance.sh +bash scripts/infer_wide_and_deep_fp16_performance.sh ``` ### Accuracy diff --git a/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh b/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh index 2e65a751f891ad3089d1bb5e27c50a032fdaaf81..4a351d878726c5b7c7a20bc3a4ac1dd7eb021db3 100644 --- a/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh +++ b/models/others/recommendation/wide_and_deep/ixrt/ci/prepare.sh @@ -40,7 +40,7 @@ mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/ cp /root/data/datasets/eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/ -wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx +cp /root/data/checkpoints/widedeep_dynamicshape_new.onnx ./ cp open_wide_deep_saved_model/* ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/ mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape.onnx diff --git a/models/others/recommendation/wide_and_deep/ixrt/scripts/infer_widedeep_fp16_performance.sh b/models/others/recommendation/wide_and_deep/ixrt/scripts/infer_wide_and_deep_fp16_performance.sh similarity index 100% rename from models/others/recommendation/wide_and_deep/ixrt/scripts/infer_widedeep_fp16_performance.sh rename to models/others/recommendation/wide_and_deep/ixrt/scripts/infer_wide_and_deep_fp16_performance.sh diff --git a/tests/model_info.json b/tests/model_info.json index 7df4b9ccfe08e291ce1012cf932ebcd2d1f88937..62ef3eba0dd6fa069eef2dc39c148fe88b15b13e 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -22,8 +22,8 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "https://www.openslr.org/33/aishell.tar.gz", - "download_url": "http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20211025_conformer_exp.tar.gz", + "datasets": "https://www.openslr.org/33/aishell", + "download_url": "http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/aishell/20211025_conformer_exp", "need_third_part": true, "precisions": [ "fp16" @@ -187,7 +187,9 @@ "datasets": "https://www.image-net.org/download.php", "download_url": "https://huggingface.co/openai/clip-vit-base-patch32", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -279,7 +281,7 @@ "github_branch": "", "github_path": "", "datasets": "https://www.image-net.org/download.php", - "download_url": "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth", + "download_url": "https://download.pytorch.org/models/convnext_base-6075fbad.pth", "need_third_part": "", "precisions": [ "fp16" @@ -1018,7 +1020,7 @@ "github_branch": "", "github_path": "", "datasets": "https://www.image-net.org/download.php", - "download_url": "https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth", + "download_url": "https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth", "need_third_part": false, "precisions": [ "fp16", @@ -2416,8 +2418,7 @@ "download_url": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth", "need_third_part": false, "precisions": [ - "fp16", - "int8" + "fp16" ], "type": "inference", "hasDemo": false, @@ -3155,10 +3156,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/tmp", + "download_url": "https://drive.google.com/open?id=1R77HmFADxe87GmoLwzfgMu_HY0IhcyBz", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -3185,10 +3188,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/coco", + "download_url": "http://files.deepspark.org.cn:880/deepspark/wts/maskrcnn.wts", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -3215,10 +3220,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/coco2017", + "download_url": "https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -3245,10 +3252,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/market1501", + "download_url": "https://local/ckpt.t7", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -3275,10 +3284,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/VehicleID", + "download_url": "https://github.com/JDAI-CV/fast-reid/releases/download/v0.1.1/vehicleid_bot_R50-ibn.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -3305,10 +3316,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/VehicleID", + "download_url": "https://local/epoch_14.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -3559,10 +3572,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/coco", + "download_url": "https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -3621,10 +3636,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/coco", + "download_url": "https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -3683,10 +3700,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/coco", + "download_url": "https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -3777,10 +3796,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/widerface", + "download_url": "https://github.com/biubug6/Face-Detector-1MB-with-landmark/raw/master/weights/mobilenet0.25_Final.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -4688,7 +4709,9 @@ "datasets": "", "download_url": "", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": true, "demoType": "image" @@ -4716,9 +4739,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://huggingface.co/facebook/chameleon-7b", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -4748,7 +4773,9 @@ "datasets": "", "download_url": "", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -4776,9 +4803,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://huggingface.co/adept/fuyu-8b", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -4806,9 +4835,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://huggingface.co/OpenGVLab/InternVL2-4B", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": true, "demoType": "image-to-text" @@ -4836,9 +4867,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -4866,39 +4899,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", - "need_third_part": "", - "precisions": "", - "type": "inference", - "hasDemo": false, - "demoType": "" - }, - { - "model_name": "minicpm_v", - "framework": "vllm", - "release_version": "25.03", - "release_sdk": "CoreX 4.2.0", - "release_gpgpu": "BI-V150", - "latest_sdk": "4.2.0", - "latest_gpgpu": "BI-V150", - "category": "multimodal/vision_language_model", - "toolbox": "", - "mdims": "", - "dataset": "", - "license": "", - "model_path": "models/multimodal/vision_language_model/minicpm_v/vllm/", - "readme_file": "models/multimodal/vision_language_model/minicpm_v/vllm/README.md", - "bitbucket_repo": "", - "bitbucket_branch": "", - "bitbucket_path": "", - "develop_owner": "", - "github_repo": "", - "github_branch": "", - "github_path": "", - "datasets": "", - "download_url": "", + "download_url": "https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5023,9 +5028,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5053,9 +5060,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5083,9 +5092,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5113,9 +5124,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5143,9 +5156,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5173,9 +5188,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": true, "demoType": "chat" @@ -5491,9 +5508,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://modelscope.cn/models/qwen/Qwen1.5-7B", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5651,7 +5670,9 @@ "datasets": "", "download_url": "", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5678,10 +5699,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/china-people-daily-ner-corpus", + "download_url": "https://huggingface.co/bert-base-chinese", "need_third_part": "", - "precisions": "", + "precisions": [ + "int8" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5708,10 +5731,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/SQuAD", + "download_url": "https://huggingface.co/csarron/bert-base-uncased-squad-v1", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5739,9 +5764,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://local/bert_base_uncased_squad", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5768,10 +5795,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "local/SQuAD", + "download_url": "https://huggingface.co/neuralmagic/bert-large-uncased-finetuned-squadv1", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5799,9 +5828,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://local/bert-large-uncased", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5829,9 +5860,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_deberta", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5859,9 +5892,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5889,9 +5924,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roformer", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -5919,9 +5956,11 @@ "github_branch": "", "github_path": "", "datasets": "", - "download_url": "", + "download_url": "https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_videobert", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -6428,7 +6467,7 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "cityscapes", + "datasets": "local/cityscapes", "download_url": "https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth", "need_third_part": false, "precisions": [ @@ -6855,7 +6894,7 @@ "demoType": "" }, { - "model_name": "mllama", + "model_name": "llama-3.2", "framework": "vllm", "release_version": "25.06", "release_sdk": "4.2.0", @@ -6867,8 +6906,8 @@ "mdims": "", "dataset": "", "license": "", - "model_path": "models/multimodal/vision_language_model/mllama/vllm", - "readme_file": "models/multimodal/vision_language_model/mllama/vllm/README.md", + "model_path": "models/multimodal/vision_language_model/llama-3.2/vllm", + "readme_file": "models/multimodal/vision_language_model/llama-3.2/vllm/README.md", "bitbucket_repo": "", "bitbucket_branch": "", "bitbucket_path": "", diff --git a/tests/run_igie.py b/tests/run_igie.py index 46e5636b98effaef49606ed74a94596f13013d45..3b9f7cf05c7d122ce402051ab44bde0900178ecb 100644 --- a/tests/run_igie.py +++ b/tests/run_igie.py @@ -94,6 +94,16 @@ def main(): logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") + # multi_object_tracking模型 + if model["category"] in ["cv/multi_object_tracking"]: + logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") + d_url = model["download_url"] + if d_url is not None: + result = run_multi_object_tracking_testcase(model) + check_model_result(result) + logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") + logging.info(f"End running {model['model_name']} test case.") + # Speech模型 if model["category"] in ["audio/speech_recognition"]: logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") @@ -159,13 +169,22 @@ def run_clf_testcase(model): for prec in model["precisions"]: logging.info(f"Start running {model_name} {prec} test case") - script = f""" - export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val - export RUN_DIR=../../igie_common/ - cd ../{model['model_path']} - bash scripts/infer_{model_name}_{prec}_accuracy.sh - bash scripts/infer_{model_name}_{prec}_performance.sh - """ + if model_name == "unet": + script = f""" + export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n} + export RUN_DIR=../../igie_common/ + cd ../{model['model_path']} + bash scripts/infer_{model_name}_{prec}_accuracy.sh + bash scripts/infer_{model_name}_{prec}_performance.sh + """ + else: + script = f""" + export DATASETS_DIR=/mnt/deepspark/data/datasets/imagenet-val + export RUN_DIR=../../igie_common/ + cd ../{model['model_path']} + bash scripts/infer_{model_name}_{prec}_accuracy.sh + bash scripts/infer_{model_name}_{prec}_performance.sh + """ r, t = run_script(script) sout = r.stdout @@ -213,9 +232,9 @@ def run_detec_testcase(model): ln -s /mnt/deepspark/data/datasets/{dataset_n} ./ """ - if model["need_third_part"] and model["3rd_party_repo"]: - third_party_repo = model["3rd_party_repo"] - prepare_script += f"unzip /mnt/deepspark/data/3rd_party/{third_party_repo}.zip -d ./\n" + # if model["need_third_part"] and model["3rd_party_repo"]: + # third_party_repo = model["3rd_party_repo"] + # prepare_script += f"unzip /mnt/deepspark/data/3rd_party/{third_party_repo}.zip -d ./\n" prepare_script += "bash ci/prepare.sh\n" # add pip list info when in debug mode @@ -384,6 +403,63 @@ def run_trace_testcase(model): logging.debug(f"matchs:\n{matchs}") return result +def run_multi_object_tracking_testcase(model): + model_name = model["model_name"] + result = { + "name": model_name, + "result": {}, + } + d_url = model["download_url"] + checkpoint_n = d_url.split("/")[-1] + dataset_n = model["datasets"].split("/")[-1] + prepare_script = f""" + cd ../{model['model_path']} + ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./ + ln -s /mnt/deepspark/data/datasets/{dataset_n} ./ + """ + + prepare_script += """ + bash ci/prepare.sh + ls -l | grep onnx + """ + + # add pip list info when in debug mode + if utils.is_debug(): + pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n" + prepare_script = pip_list_script + prepare_script + pip_list_script + + run_script(prepare_script) + + for prec in model["precisions"]: + logging.info(f"Start running {model_name} {prec} test case") + script = f""" + cd ../{model['model_path']} + export DATASETS_DIR=./{dataset_n}/ + bash scripts/infer_{model_name}_{prec}_accuracy.sh + bash scripts/infer_{model_name}_{prec}_performance.sh + """ + + r, t = run_script(script) + sout = r.stdout + pattern = r"\* ([\w\d ]+):\s*([\d.]+)[ ms%]*, ([\w\d ]+):\s*([\d.]+)[ ms%]*" + matchs = re.findall(pattern, sout) + for m in matchs: + result["result"].setdefault(prec, {"status": "FAIL"}) + try: + result["result"][prec] = result["result"][prec] | {m[0]: float(m[1]), m[2]: float(m[3])} + except ValueError: + print("The string cannot be converted to a float.") + result["result"][prec] = result["result"][prec] | {m[0]: m[1], m[2]: m[3]} + pattern = METRIC_PATTERN + matchs = re.findall(pattern, sout) + if matchs and len(matchs) == 1: + result["result"].setdefault(prec, {}) + result["result"][prec].update(get_metric_result(matchs[0])) + result["result"][prec]["status"] = "PASS" + result["result"][prec]["Cost time (s)"] = t + logging.debug(f"matchs:\n{matchs}") + return result + # BERT series models def run_nlp_testcase(model): model_name = model["model_name"] diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py index a19223ad859fdc6f4cf8d9e14c3d7c93086925ad..9464042144020809ff2d7f3983ff74d924e1df3f 100644 --- a/tests/run_ixrt.py +++ b/tests/run_ixrt.py @@ -94,6 +94,16 @@ def main(): logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") + # instance_segmentation模型 + if model["category"] in ["cv/instance_segmentation"]: + logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") + d_url = model["download_url"] + if d_url is not None: + result = run_instance_segmentation_testcase(model) + check_model_result(result) + logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") + logging.info(f"End running {model['model_name']} test case.") + # NLP模型 if model["category"] in ["nlp/plm", "others/recommendation"]: logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") @@ -191,9 +201,29 @@ def run_clf_testcase(model): match_count += 1 result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}") break - if match_count == len(patterns): result["result"][prec]["status"] = "PASS" + + if model_name == "swin_transformer_large": + pattern = r'Throughput: (\d+\.\d+) qps' + matchs = re.findall(pattern, sout) + for m in matchs: + result["result"].setdefault(prec, {"status": "FAIL"}) + try: + result["result"][prec]["QPS"] = float(m) + except ValueError: + print("The string cannot be converted to a float.") + result["result"][prec]["QPS"] = m + + pattern = METRIC_PATTERN + matchs = re.findall(pattern, sout) + result["result"].setdefault(prec, {"status": "FAIL"}) + logging.debug(f"matchs:\n{matchs}") + for m in matchs: + result["result"][prec].update(get_metric_result(m)) + if len(matchs) == 1: + result["result"][prec]["status"] = "PASS" + result["result"][prec]["Cost time (s)"] = t logging.debug(f"matchs:\n{matchs}") return result @@ -375,7 +405,9 @@ def run_nlp_testcase(model): bash scripts/infer_{model_name}_{prec}_performance.sh cd ./ByteMLPerf/byte_infer_perf/general_perf """ - if model_name == "roformer" or model_name == "widedeep": + if model_name == "roformer" or model_name == "wide_and_deep": + if model_name == "wide_and_deep": + model_name = "widedeep" script += f""" python3 core/perf_engine.py --hardware_type ILUVATAR --task {model_name}-tf-fp32 """ @@ -414,13 +446,23 @@ def run_nlp_testcase(model): r, t = run_script(script) sout = r.stdout + pattern = r'Throughput: (\d+\.\d+) qps' + matchs = re.findall(pattern, sout) + for m in matchs: + result["result"].setdefault(prec, {"status": "FAIL"}) + try: + result["result"][prec]["QPS"] = float(m) + except ValueError: + print("The string cannot be converted to a float.") + result["result"][prec]["QPS"] = m + pattern = METRIC_PATTERN matchs = re.findall(pattern, sout) result["result"].setdefault(prec, {"status": "FAIL"}) logging.debug(f"matchs:\n{matchs}") for m in matchs: result["result"][prec].update(get_metric_result(m)) - if len(matchs) == 2: + if len(matchs) == 1: result["result"][prec]["status"] = "PASS" result["result"][prec]["Cost time (s)"] = t @@ -478,6 +520,59 @@ def run_speech_testcase(model): logging.debug(f"matchs:\n{matchs}") return result +def run_instance_segmentation_testcase(model): + model_name = model["model_name"] + result = { + "name": model_name, + "result": {}, + } + d_url = model["download_url"] + checkpoint_n = d_url.split("/")[-1] + dataset_n = model["datasets"].split("/")[-1] + prepare_script = f""" + cd ../{model['model_path']} + ln -s /root/data/checkpoints/{checkpoint_n} ./ + ln -s /root/data/datasets/{dataset_n} ./ + bash ci/prepare.sh + ls -l | grep onnx + """ + + # add pip list info when in debug mode + if utils.is_debug(): + pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n" + prepare_script = pip_list_script + prepare_script + pip_list_script + + run_script(prepare_script) + + for prec in model["precisions"]: + logging.info(f"Start running {model_name} {prec} test case") + script = f""" + cd ../{model['model_path']} + export PROJ_DIR=./ + export DATASETS_DIR=./coco2017/ + export CHECKPOINTS_DIR=./checkpoints + export COCO_GT=./coco2017/annotations/instances_val2017.json + export EVAL_DIR=./coco2017/val2017 + export RUN_DIR=./ + bash scripts/infer_{model_name}_{prec}_accuracy.sh + bash scripts/infer_{model_name}_{prec}_performance.sh + """ + + r, t = run_script(script) + sout = r.stdout + pattern = METRIC_PATTERN + matchs = re.findall(pattern, sout) + result["result"].setdefault(prec, {"status": "FAIL"}) + logging.debug(f"matchs:\n{matchs}") + for m in matchs: + result["result"][prec].update(get_metric_result(m)) + if len(matchs) == 2: + result["result"][prec]["status"] = "PASS" + + result["result"][prec]["Cost time (s)"] = t + logging.debug(f"matchs:\n{matchs}") + return result + def get_metric_result(str): if str: return json.loads(str.replace("'", "\""))["metricResult"] diff --git a/tests/run_trtllm.py b/tests/run_trtllm.py index c57e02816ef6dba5f829ecfaf2e7b1e3849d0da5..ac79b3b9591e4cd65269a4d5e0341b4b2d2d0a9a 100644 --- a/tests/run_trtllm.py +++ b/tests/run_trtllm.py @@ -72,7 +72,7 @@ def get_model_config(mode_name): models = json.load(file) for model in models['models']: - if model["model_name"] == mode_name.lower() and model["framework"] == "trtllm": + if model["model_name"] == mode_name.lower() and (model["framework"] == "trtllm" or model["framework"] == "tgi"): return model return diff --git a/tests/run_vllm.py b/tests/run_vllm.py index a200569cfda5db31f720bcd9ccffba70399a056b..e05973fdd535f8d626c53056c2280e8dcbf214e6 100644 --- a/tests/run_vllm.py +++ b/tests/run_vllm.py @@ -205,10 +205,14 @@ def run_nlp_testcase(model): python3 offline_inference.py --model ./stablelm --max-tokens 256 -tp 1 --temperature 0.0 """ elif model_name.startswith("deepseek-r1-distill-"): + if model_name == "deepseek-r1-distill-qwen-32b": + tp = 4 + else: + tp = 2 script = f""" set -x cd ../{model['model_path']} - python3 offline_inference.py --model ./{model_name} --max-tokens 256 -tp 2 --temperature 0.0 --max-model-len 3096 + python3 offline_inference.py --model ./{model_name} --max-tokens 256 -tp {tp} --temperature 0.0 --max-model-len 3096 """ elif model_name == "aria": script = f""" @@ -217,6 +221,13 @@ def run_nlp_testcase(model): export VLLM_ASSETS_CACHE=../vllm/ python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --dtype bfloat16 --tokenizer-mode slow """ + elif model_name == "chameleon_7b" or model_name == "fuyu_8b": + script = f""" + set -x + cd ../{model['model_path']} + export VLLM_ASSETS_CACHE=../vllm/ + python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0 + """ elif model_name == "h2vol" or model_name == "idefics3": script = f""" set -x @@ -231,7 +242,7 @@ def run_nlp_testcase(model): export VLLM_ASSETS_CACHE=../vllm/ PT_SDPA_ENABLE_HEAD_DIM_PADDING=1 python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0 """ - elif model_name == "mllama": + elif model_name == "llama-3.2": script = f""" set -x cd ../{model['model_path']} @@ -246,6 +257,27 @@ def run_nlp_testcase(model): export VLLM_ASSETS_CACHE=../vllm/ python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --tokenizer-mode 'mistral' """ + elif model_name == "llava": + script = f""" + set -x + cd ../{model['model_path']} + export VLLM_ASSETS_CACHE=../vllm/ + python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --model-type llava-next --max-model-len 4096 + """ + elif model_name == "llava_next_video_7b": + script = f""" + set -x + cd ../{model['model_path']} + export VLLM_ASSETS_CACHE=../vllm/ + python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --model-type llava-next-video --modality video --dtype bfloat16 + """ + elif model_name == "intern_vl": + script = f""" + set -x + cd ../{model['model_path']} + export VLLM_ASSETS_CACHE=../vllm/ + python3 offline_inference_vision_language.py --model ./{model_name} --max-tokens 256 -tp 2 --temperature 0.0 --max-model-len 2048 + """ r, t = run_script(script) sout = r.stdout @@ -257,6 +289,13 @@ def run_nlp_testcase(model): result["result"][prec]["tokens"] = int(matchs.group(1)) result["result"][prec]["QPS"] = float(matchs.group(2)) result["result"][prec]["status"] = "PASS" + else: + pattern = r"Maximum concurrency for (\d+) tokens per request: ([\d.]+)x" + matchs = re.search(pattern, sout) + if matchs: + result["result"][prec]["tokens"] = int(matchs.group(1)) + result["result"][prec]["QPS"] = float(matchs.group(2)) + result["result"][prec]["status"] = "PASS" result["result"][prec]["Cost time (s)"] = t return result diff --git a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py index 089d9860f573bba7e19f84aa20fb830a8fcc22d8..f8a2797282b4a2edbace565b8a7d68ad3090ea48 100644 --- a/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py +++ b/toolbox/ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py @@ -210,6 +210,9 @@ class PerfEngine: if accuracy_report: base_report['Accuracy'] = accuracy_report + metricResult = {} + metricResult['metricResult'] = accuracy_report + print(metricResult) # function to test qps and latency if workload['test_perf']: