diff --git a/README.md b/README.md index a92b22a32f0390f7d8cb860bbadfa7247c1f24d9..c9932f74103ab87db67e8d4cc1b75d5bf9c000eb 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 ConvNeXt-Base FP16 Supported - - + Supported INT8 @@ -90,7 +90,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 CSPResNet50 FP16 - - + Supported Supported @@ -102,7 +102,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 DeiT-tiny FP16 Supported - - + Supported INT8 @@ -146,7 +146,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 DenseNet201 FP16 Supported - - + Supported INT8 @@ -197,6 +197,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + EfficientNet-B4 + FP16 + Supported + - + + + INT8 + - + - + EfficientNetV2 FP16 @@ -212,7 +223,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 EfficientNetv2_rw_t FP16 Supported - - + Supported INT8 @@ -274,6 +285,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - Supported + + Mixer_B + FP16 + Supported + - + + + INT8 + - + - + MNASNet0_5 FP16 @@ -285,6 +307,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + MNASNet0_75 + FP16 + Supported + - + + + INT8 + - + - + MobileNetV2 FP16 @@ -329,6 +362,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + RegNet_x_16gf + FP16 + Supported + - + + + INT8 + - + - + RegNet_x_1_6gf FP16 @@ -472,6 +516,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + ResNeXt101_32x8d + FP16 + Supported + - + + + INT8 + - + - + SEResNet50 FP16 @@ -528,26 +583,48 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - SqueezeNet 1.0 + ShuffleNetV2_x2_0 FP16 + Supported + - + + + INT8 + - - - Supported + + + SqueezeNet 1.0 + FP16 + Supported + Supported INT8 - - Supported + Supported SqueezeNet 1.1 FP16 - - Supported + Supported + + + INT8 + - + Supported + + + SVT Base + FP16 + Supported + - INT8 - - Supported + - Swin Transformer @@ -571,6 +648,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + VGG11 + FP16 + Supported + - + + + INT8 + - + - + VGG16 FP16 @@ -593,6 +681,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 Supported Supported + + Wide ResNet101 + FP16 + Supported + - + + + INT8 + - + - + ### Detection @@ -652,7 +751,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 FoveaBox FP16 Supported - - + Supported INT8 @@ -663,7 +762,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 FSAF FP16 Supported - - + Supported INT8 @@ -674,7 +773,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 HRNet FP16 Supported - - + Supported INT8 @@ -725,6 +824,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + SABL + FP16 + Supported + - + + + INT8 + - + - + YOLOv3 FP16 @@ -824,6 +934,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + YOLOv11 + FP16 + Supported + - + + + INT8 + - + - + YOLOX FP16 @@ -867,29 +988,24 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 Models Precision IGIE - IxRT Kie_layoutXLM FP16 Supported - - INT8 - - - SVTR FP16 Supported - - INT8 - - - @@ -902,6 +1018,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 IGIE IxRT + + HRNetPose + FP16 + Supported + - + + + INT8 + - + - + Lightweight OpenPose FP16 @@ -1026,209 +1153,236 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 -## NLP - -### Language Modelling +## LLM (Large Language Model) - - - + + + - - + + + - - + + - - + + - - + + + - - - - - - - + + + - - - - - + + + + - - - + + + + - - + + - + - - + + + - - + + + - - + + - - + + + - - + + - - + + + - - + +
ModelsPrecisionIGIEIxRTvLLMTensorRT-LLMTGI
ALBERTFP16Baichuan2-7BSupported- -Supported
INT8ChatGLM-3-6BSupported - -
BERT Base NERFP16ChatGLM-3-6B-32KSupported - -
INT8SupportedLlama2-7BSupportedSupported -
BERT Base SQuADFP16SupportedSupported
INT8Llama2-13B-Supported -Supported
BERT Large SQuADFP16SupportedSupportedLlama2-70B-Supported-
INT8SupportedSupportedLlama3-70BSupported--
DeBERTaFP16Qwen-7BSupported -Supported-
INT8-Qwen1.5-7BSupported -Supported
RoBERTaFP16Qwen1.5-14BSupported- -Supported
INT8Qwen1.5-32B ChatSupported - -
RoFormerFP16Qwen1.5-72BSupported- -Supported
INT8Qwen2-7B InstructSupported - -
VideoBERTFP16Qwen2-72B InstructSupported- -Supported
INT8StableLM2-1.6BSupported - -
-### Large Language Model +## Multimodal - - + + + + + + + + + + + + + + + + + + + + + + + + +
Models vLLMTensorRT-LLMTGI
Chameleon-7BSupported
Fuyu-8BSupported
InternVL2-4BSupported
LLaVASupported
LLaVA-Next-Video-7BSupported
MiniCPM V2Supported
+ +## NLP + +### Language Modelling + + - - - + + + + + + + + + - - + - - + + - - - + + - - - - + + + + - - - + + - - - - + + + + - - - - + + + - - + + - + - - + + - - - - + + + - - + - - - + + + - - + - - - + + + - - + diff --git a/models/cv/classification/deit_tiny/ixrt/README.md b/models/cv/classification/deit_tiny/ixrt/README.md index 1ceea7bfbbb8bb888cc0cb2a14bc5de4773962fa..15fb025e9badbdb2f303f13a977675638489c90e 100644 --- a/models/cv/classification/deit_tiny/ixrt/README.md +++ b/models/cv/classification/deit_tiny/ixrt/README.md @@ -70,4 +70,4 @@ bash scripts/infer_deit_tiny_fp16_performance.sh ## Reference -Deit_tiny: \ No newline at end of file +Deit_tiny: diff --git a/models/cv/classification/vgg11/igie/README.md b/models/cv/classification/vgg11/igie/README.md index 0206d951f48fc2be828725a66323111e85f0565f..522ff3e7a108eedb95cb61074ac1b90ebb8d027c 100644 --- a/models/cv/classification/vgg11/igie/README.md +++ b/models/cv/classification/vgg11/igie/README.md @@ -43,4 +43,4 @@ bash scripts/infer_vgg11_fp16_performance.sh Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) --------|-----------|----------|----------|----------|-------- -VGG11 | 32 | FP16 | 3872.86 | 69.03 | 88.6 \ No newline at end of file +VGG11 | 32 | FP16 | 3872.86 | 69.03 | 88.6 diff --git a/models/cv/classification/wide_resnet101/igie/README.md b/models/cv/classification/wide_resnet101/igie/README.md index a72eeb1c48d770576d8e2d8bf00ac28cd7f4e404..93a5a3b8daa1d18ba43053f76ca90bdcb0b31bad 100644 --- a/models/cv/classification/wide_resnet101/igie/README.md +++ b/models/cv/classification/wide_resnet101/igie/README.md @@ -43,4 +43,4 @@ bash scripts/infer_wide_resnet101_fp16_performance.sh | Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | | -------------- | --------- | --------- | -------- | -------- | -------- | -| Wide ResNet101 | 32 | FP16 | 1339.037 | 78.459 | 94.052 | \ No newline at end of file +| Wide ResNet101 | 32 | FP16 | 1339.037 | 78.459 | 94.052 | diff --git a/models/cv/detection/sabl/igie/README.md b/models/cv/detection/sabl/igie/README.md index 975e72daaef46a7c36d31cc4992a4524b2c7084a..28d0242e2905f30d1f520a3d8f4245fe86ccf5f5 100644 --- a/models/cv/detection/sabl/igie/README.md +++ b/models/cv/detection/sabl/igie/README.md @@ -27,6 +27,7 @@ Dataset: to download the valida ```bash wget https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_1x_coco/sabl_retinanet_r50_fpn_1x_coco-6c54fd4f.pth ``` + ### Model Conversion ```bash diff --git a/models/cv/pose_estimation/hrnetpose/igie/README.md b/models/cv/pose_estimation/hrnetpose/igie/README.md index 1785d1f7363a5379fce907fdcb19316399ff5850..c4f0758fd794e3d6a553955697b9334929d99b65 100644 --- a/models/cv/pose_estimation/hrnetpose/igie/README.md +++ b/models/cv/pose_estimation/hrnetpose/igie/README.md @@ -5,6 +5,7 @@ HRNetPose (High-Resolution Network for Pose Estimation) is a high-performance human pose estimation model introduced in the paper "Deep High-Resolution Representation Learning for Human Pose Estimation". It is designed to address the limitations of traditional methods by maintaining high-resolution feature representations throughout the network, enabling more accurate detection of human keypoints. ## Setup + ### Install ```bash @@ -18,6 +19,7 @@ pip3 install -r requirements.txt ``` ### Download + Pretrained model: Dataset: to download the validation dataset. @@ -27,6 +29,7 @@ wget https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192 ``` ### Model Conversion + ```bash # export onnx model python3 export.py --weight hrnet_w32_coco_256x192-c78dce93_20200708.pth --cfg td-hm_hrnet-w32_8xb64-210e_coco-256x192.py --input 1,3,256,192 --output hrnetpose.onnx @@ -58,4 +61,4 @@ bash scripts/infer_hrnetpose_fp16_performance.sh ## Reference -mmpose: \ No newline at end of file +mmpose: diff --git a/models/vision-language-understanding/chameleon-7b/vllm/README.md b/models/multimodal/vision_language_understanding/chameleon_7b/vllm/README.md similarity index 99% rename from models/vision-language-understanding/chameleon-7b/vllm/README.md rename to models/multimodal/vision_language_understanding/chameleon_7b/vllm/README.md index bebd7c799c66db9bc8ae7fe093ea945d34bbdfca..568dbd7011ff4bf157b4a0e2ee027729f192f9a0 100755 --- a/models/vision-language-understanding/chameleon-7b/vllm/README.md +++ b/models/multimodal/vision_language_understanding/chameleon_7b/vllm/README.md @@ -2,7 +2,7 @@ ## Description -Chameleon, an AI system that mitigates these limitations by augmenting LLMs with plug-and-play modules for compositional reasoning. Chameleon synthesizes programs by composing various tools (e.g., LLMs, off-the-shelf vision models, web search engines, Python functions, and heuristic-based modules) for accomplishing complex reasoning tasks. At the heart of Chameleon is an LLM-based planner that assembles a sequence of tools to execute to generate the final response. We showcase the effectiveness of Chameleon on two multi-modal knowledge-intensive reasoning tasks: ScienceQA and TabMWP. Chameleon, powered by GPT-4, achieves an 86.54% overall accuracy on ScienceQA, improving the best published few-shot result by 11.37%. On TabMWP, GPT-4-powered Chameleon improves the accuracy by 17.0%, lifting the state of the art to 98.78%. Our analysis also shows that the GPT-4-powered planner exhibits more consistent and rational tool selection via inferring potential constraints from instructions, compared to a ChatGPT-powered planner. +Chameleon, an AI system that mitigates these limitations by augmenting LLMs with plug-and-play modules for compositional reasoning. Chameleon synthesizes programs by composing various tools (e.g., LLMs, off-the-shelf vision models, web search engines, Python functions, and heuristic-based modules) for accomplishing complex reasoning tasks. At the heart of Chameleon is an LLM-based planner that assembles a sequence of tools to execute to generate the final response. We showcase the effectiveness of Chameleon on two multi-modal knowledge-intensive reasoning tasks: ScienceQA and TabMWP. Chameleon, powered by GPT-4, achieves an 86.54% overall accuracy on ScienceQA, improving the best published few-shot result by 11.37%. On TabMWP, GPT-4-powered Chameleon improves the accuracy by 17.0%, lifting the state of the art to 98.78%. Our analysis also shows that the GPT-4-powered planner exhibits more consistent and rational tool selection via inferring potential constraints from instructions, compared to a ChatGPT-powered planner. ## Setup @@ -32,4 +32,4 @@ mkdir data ```bash export VLLM_ASSETS_CACHE=../vllm/ python3 offline_inference_vision_language.py --model ./data/chameleon-7b --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0 -``` \ No newline at end of file +``` diff --git a/models/vision-language-understanding/chameleon-7b/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_understanding/chameleon_7b/vllm/offline_inference_vision_language.py similarity index 100% rename from models/vision-language-understanding/chameleon-7b/vllm/offline_inference_vision_language.py rename to models/multimodal/vision_language_understanding/chameleon_7b/vllm/offline_inference_vision_language.py diff --git a/models/vision-language-understanding/chameleon-7b/vllm/utils.py b/models/multimodal/vision_language_understanding/chameleon_7b/vllm/utils.py similarity index 100% rename from models/vision-language-understanding/chameleon-7b/vllm/utils.py rename to models/multimodal/vision_language_understanding/chameleon_7b/vllm/utils.py diff --git a/models/vision-language-understanding/Intern_VL/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_understanding/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg similarity index 100% rename from models/vision-language-understanding/Intern_VL/vllm/vllm_public_assets/cherry_blossom.jpg rename to models/multimodal/vision_language_understanding/chameleon_7b/vllm/vllm_public_assets/cherry_blossom.jpg diff --git a/models/vision-language-understanding/fuyu-8b/vllm/README.md b/models/multimodal/vision_language_understanding/fuyu_8b/vllm/README.md similarity index 98% rename from models/vision-language-understanding/fuyu-8b/vllm/README.md rename to models/multimodal/vision_language_understanding/fuyu_8b/vllm/README.md index 7bc5d2cc04268a63d6dd1bebb4e040f55f80be4d..96d5acc3b208b72de34624acb74849a7536670d0 100755 --- a/models/vision-language-understanding/fuyu-8b/vllm/README.md +++ b/models/multimodal/vision_language_understanding/fuyu_8b/vllm/README.md @@ -1,4 +1,4 @@ -# FuyuForCausalLM +# Fuyu-8B ## Description @@ -34,4 +34,4 @@ mkdir data ```bash export VLLM_ASSETS_CACHE=../vllm/ python3 offline_inference_vision_language.py --model ./data/fuyu-8b --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0 -``` \ No newline at end of file +``` diff --git a/models/vision-language-understanding/fuyu-8b/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_understanding/fuyu_8b/vllm/offline_inference_vision_language.py similarity index 100% rename from models/vision-language-understanding/fuyu-8b/vllm/offline_inference_vision_language.py rename to models/multimodal/vision_language_understanding/fuyu_8b/vllm/offline_inference_vision_language.py diff --git a/models/vision-language-understanding/fuyu-8b/vllm/utils.py b/models/multimodal/vision_language_understanding/fuyu_8b/vllm/utils.py similarity index 100% rename from models/vision-language-understanding/fuyu-8b/vllm/utils.py rename to models/multimodal/vision_language_understanding/fuyu_8b/vllm/utils.py diff --git a/models/vision-language-understanding/LLava/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_understanding/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg similarity index 100% rename from models/vision-language-understanding/LLava/vllm/vllm_public_assets/cherry_blossom.jpg rename to models/multimodal/vision_language_understanding/fuyu_8b/vllm/vllm_public_assets/cherry_blossom.jpg diff --git a/models/vision-language-understanding/Intern_VL/vllm/README.md b/models/multimodal/vision_language_understanding/intern_vl/vllm/README.md similarity index 98% rename from models/vision-language-understanding/Intern_VL/vllm/README.md rename to models/multimodal/vision_language_understanding/intern_vl/vllm/README.md index 0b09f06eaef8da47b0f9fced467f6d34770191e7..cafe64f99ad80d3602a3ba3f478ae6b1f8cdb3fb 100644 --- a/models/vision-language-understanding/Intern_VL/vllm/README.md +++ b/models/multimodal/vision_language_understanding/intern_vl/vllm/README.md @@ -6,7 +6,7 @@ InternVL2-4B is a large-scale multimodal model developed by WeTab AI, designed t ## Setup -### Instal +### Install In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website. @@ -22,6 +22,7 @@ pip3 install vllm pip3 install triton pip3 install ixformer ``` + ### Download -Model: diff --git a/models/vision-language-understanding/Intern_VL/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_understanding/intern_vl/vllm/offline_inference_vision_language.py similarity index 100% rename from models/vision-language-understanding/Intern_VL/vllm/offline_inference_vision_language.py rename to models/multimodal/vision_language_understanding/intern_vl/vllm/offline_inference_vision_language.py diff --git a/models/vision-language-understanding/Intern_VL/vllm/utils.py b/models/multimodal/vision_language_understanding/intern_vl/vllm/utils.py similarity index 100% rename from models/vision-language-understanding/Intern_VL/vllm/utils.py rename to models/multimodal/vision_language_understanding/intern_vl/vllm/utils.py diff --git a/models/vision-language-understanding/chameleon-7b/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_understanding/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg similarity index 100% rename from models/vision-language-understanding/chameleon-7b/vllm/vllm_public_assets/cherry_blossom.jpg rename to models/multimodal/vision_language_understanding/intern_vl/vllm/vllm_public_assets/cherry_blossom.jpg diff --git a/models/vision-language-understanding/LLava/vllm/README.md b/models/multimodal/vision_language_understanding/llava/vllm/README.md similarity index 99% rename from models/vision-language-understanding/LLava/vllm/README.md rename to models/multimodal/vision_language_understanding/llava/vllm/README.md index 1b805f41751576d165f5aa5d5ad1259abd5a20f1..bbc251d2b3766dde38214515217f0faf75085d12 100644 --- a/models/vision-language-understanding/LLava/vllm/README.md +++ b/models/multimodal/vision_language_understanding/llava/vllm/README.md @@ -4,7 +4,6 @@ LLaVA is an open-source chatbot trained by fine-tuning LLaMA/Vicuna on GPT-generated multimodal instruction-following data. It is an auto-regressive language model, based on the transformer architecture.The LLaVA-NeXT model was proposed in LLaVA-NeXT: Improved reasoning, OCR, and world knowledge by Haotian Liu, Chunyuan Li, Yuheng Li, Bo Li, Yuanhan Zhang, Sheng Shen, Yong Jae Lee. LLaVa-NeXT (also called LLaVa-1.6) improves upon LLaVa-1.5 by increasing the input image resolution and training on an improved visual instruction tuning dataset to improve OCR and common sense reasoning. - ## Setup ### Install @@ -24,7 +23,6 @@ pip3 install transformers -llava-v1.6-vicuna-7b-hf: - ```bash # Download model from the website and make sure the model's path is "data/llava" mkdir data @@ -39,11 +37,10 @@ export PATH=/usr/local/corex/bin:${PATH} export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64 ``` - ### Inference llava-1.6 ```bash export VLLM_ASSETS_CACHE=../vllm/ export CUDA_VISIBLE_DEVICES=0,1,2,3 python3 offline_inference_vision_language.py --model /path/to/model --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --model-type llava-next --max-model-len 4096 -``` \ No newline at end of file +``` diff --git a/models/vision-language-understanding/LLava/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_understanding/llava/vllm/offline_inference_vision_language.py similarity index 100% rename from models/vision-language-understanding/LLava/vllm/offline_inference_vision_language.py rename to models/multimodal/vision_language_understanding/llava/vllm/offline_inference_vision_language.py diff --git a/models/vision-language-understanding/LLava/vllm/utils.py b/models/multimodal/vision_language_understanding/llava/vllm/utils.py similarity index 100% rename from models/vision-language-understanding/LLava/vllm/utils.py rename to models/multimodal/vision_language_understanding/llava/vllm/utils.py diff --git a/models/vision-language-understanding/fuyu-8b/vllm/vllm_public_assets/cherry_blossom.jpg b/models/multimodal/vision_language_understanding/llava/vllm/vllm_public_assets/cherry_blossom.jpg similarity index 100% rename from models/vision-language-understanding/fuyu-8b/vllm/vllm_public_assets/cherry_blossom.jpg rename to models/multimodal/vision_language_understanding/llava/vllm/vllm_public_assets/cherry_blossom.jpg diff --git a/models/vision-language-understanding/llava_next_video-7b/vllm/README.md b/models/multimodal/vision_language_understanding/llava_next_video_7b/vllm/README.md similarity index 97% rename from models/vision-language-understanding/llava_next_video-7b/vllm/README.md rename to models/multimodal/vision_language_understanding/llava_next_video_7b/vllm/README.md index a50af3a220158968f5de39e48aed3bf61362e011..bf4b268310243ce6b51b23efdbf0d97aed5a573c 100755 --- a/models/vision-language-understanding/llava_next_video-7b/vllm/README.md +++ b/models/multimodal/vision_language_understanding/llava_next_video_7b/vllm/README.md @@ -1,4 +1,4 @@ -# LLaVA-Next-Video +# LLaVA-Next-Video-7B ## Description @@ -32,4 +32,4 @@ mkdir data ```bash export VLLM_ASSETS_CACHE=../vllm/ python3 offline_inference_vision_language.py --model ./data/LLaVA-NeXT-Video-7B-hf --max-tokens 256 -tp 4 --trust-remote-code --temperature 0.0 --model-type llava-next-video --modality video --dtype bfloat16 -``` \ No newline at end of file +``` diff --git a/models/vision-language-understanding/llava_next_video-7b/vllm/offline_inference_vision_language.py b/models/multimodal/vision_language_understanding/llava_next_video_7b/vllm/offline_inference_vision_language.py similarity index 100% rename from models/vision-language-understanding/llava_next_video-7b/vllm/offline_inference_vision_language.py rename to models/multimodal/vision_language_understanding/llava_next_video_7b/vllm/offline_inference_vision_language.py diff --git a/models/vision-language-understanding/llava_next_video-7b/vllm/utils.py b/models/multimodal/vision_language_understanding/llava_next_video_7b/vllm/utils.py similarity index 100% rename from models/vision-language-understanding/llava_next_video-7b/vllm/utils.py rename to models/multimodal/vision_language_understanding/llava_next_video_7b/vllm/utils.py diff --git a/models/vision-language-understanding/llava_next_video-7b/vllm/video-eample-data/sample_demo_1.mp4 b/models/multimodal/vision_language_understanding/llava_next_video_7b/vllm/video-eample-data/sample_demo_1.mp4 similarity index 100% rename from models/vision-language-understanding/llava_next_video-7b/vllm/video-eample-data/sample_demo_1.mp4 rename to models/multimodal/vision_language_understanding/llava_next_video_7b/vllm/video-eample-data/sample_demo_1.mp4 diff --git a/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/README.md b/models/multimodal/vision_language_understanding/minicpm_v_2/vllm/README.md similarity index 79% rename from models/multimodal/vision-language-understanding/minicpm-v-2/vllm/README.md rename to models/multimodal/vision_language_understanding/minicpm_v_2/vllm/README.md index 2dc49881226176329767fcf52f9d0742a4912056..149f01f15dac0c9b701b1f0cba9ad41bc242da9e 100644 --- a/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/README.md +++ b/models/multimodal/vision_language_understanding/minicpm_v_2/vllm/README.md @@ -1,8 +1,8 @@ -# MiniCPM-V-2 +# MiniCPM V2 ## Description -MiniCPM-V-2 is a compact and efficient language model designed for various natural language processing (NLP) tasks. Building on its predecessor, MiniCPM-V-1, this model integrates advancements in architecture and optimization techniques, making it suitable for deployment in resource-constrained environments.s +MiniCPM V2 is a compact and efficient language model designed for various natural language processing (NLP) tasks. Building on its predecessor, MiniCPM-V-1, this model integrates advancements in architecture and optimization techniques, making it suitable for deployment in resource-constrained environments.s ## Setup diff --git a/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/ci/prepare.sh b/models/multimodal/vision_language_understanding/minicpm_v_2/vllm/ci/prepare.sh similarity index 100% rename from models/multimodal/vision-language-understanding/minicpm-v-2/vllm/ci/prepare.sh rename to models/multimodal/vision_language_understanding/minicpm_v_2/vllm/ci/prepare.sh diff --git a/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/minicpmv-2.0-offline.py b/models/multimodal/vision_language_understanding/minicpm_v_2/vllm/minicpmv-2.0-offline.py similarity index 100% rename from models/multimodal/vision-language-understanding/minicpm-v-2/vllm/minicpmv-2.0-offline.py rename to models/multimodal/vision_language_understanding/minicpm_v_2/vllm/minicpmv-2.0-offline.py
Baichuan2-7BSupported-ModelsPrecisionIGIEIxRT
ALBERTFP16 -Supported
ChatGLM-3-6BSupportedINT8 - -
ChatGLM-3-6B-32KSupportedBERT Base NERFP16 - -
Llama2-7BSupportedSupportedINT8Supported -
Llama2-13B-Supported-BERT Base SQuADFP16SupportedSupported
Llama2-70B-SupportedINT8 -Supported
Llama3-70BSupported--BERT Large SQuADFP16SupportedSupported
MiniCPM-V-2Supported--INT8SupportedSupported
Qwen-7BSupportedDeBERTaFP16 --Supported
Qwen1.5-7BSupportedINT8- -Supported
Qwen1.5-14BSupported-RoBERTaFP16 -Supported
Qwen1.5-32B ChatSupportedINT8 - -
Qwen1.5-72BSupported-RoFormerFP16 -Supported
Qwen2-7B InstructSupportedINT8 - -
Qwen2-72B InstructSupported-VideoBERTFP16 -Supported
StableLM2-1.6BSupportedINT8 - -