From a99e50b3961733abf01a7fed6326068fdc57ef01 Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Fri, 8 Aug 2025 18:01:44 +0800 Subject: [PATCH 1/3] add 25.09 llm models to the list Signed-off-by: mingjiang.li --- README.md | 59 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index bb5b169f..6729fe73 100644 --- a/README.md +++ b/README.md @@ -24,29 +24,42 @@ ### 大语言模型(LLM) -| Model | vLLM | TRT-LLM | TGI | IXUCA SDK | -|-------------------------------|--------------------------------------------------------|---------------------------------------|------------------------------------|-----------| -| Baichuan2-7B | [✅](models/nlp/llm/baichuan2-7b/vllm) | | | 4.3.0 | -| ChatGLM-3-6B | [✅](models/nlp/llm/chatglm3-6b/vllm) | | | 4.3.0 | -| ChatGLM-3-6B-32K | [✅](models/nlp/llm/chatglm3-6b-32k/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Llama-8B | [✅](models/nlp/llm/deepseek-r1-distill-llama-8b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Llama-70B | [✅](models/nlp/llm/deepseek-r1-distill-llama-70b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-1.5B | [✅](models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-7B | [✅](models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-14B | [✅](models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-32B | [✅](models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm) | | | 4.3.0 | -| Llama2-7B | [✅](models/nlp/llm/llama2-7b/vllm) | [✅](models/nlp/llm/llama2-7b/trtllm) | | 4.3.0 | -| Llama2-13B | | [✅](models/nlp/llm/llama2-13b/trtllm) | | 4.3.0 | -| Llama2-70B | | [✅](models/nlp/llm/llama2-70b/trtllm) | | 4.3.0 | -| Llama3-70B | [✅](models/nlp/llm/llama3-70b/vllm) | | | 4.3.0 | -| Qwen-7B | [✅](models/nlp/llm/qwen-7b/vllm) | | | 4.3.0 | -| Qwen1.5-7B | [✅](models/nlp/llm/qwen1.5-7b/vllm) | | [✅](models/nlp/llm/qwen1.5-7b/tgi) | 4.3.0 | -| Qwen1.5-14B | [✅](models/nlp/llm/qwen1.5-14b/vllm) | | | 4.3.0 | -| Qwen1.5-32B Chat | [✅](models/nlp/llm/qwen1.5-32b/vllm) | | | 4.3.0 | -| Qwen1.5-72B | [✅](models/nlp/llm/qwen1.5-72b/vllm) | | | 4.3.0 | -| Qwen2-7B Instruct | [✅](models/nlp/llm/qwen2-7b/vllm) | | | 4.3.0 | -| Qwen2-72B Instruct | [✅](models/nlp/llm/qwen2-72b/vllm) | | | 4.3.0 | -| StableLM2-1.6B | [✅](models/nlp/llm/stablelm/vllm) | | | 4.3.0 | +| Model | Engine | Supported | IXUCA SDK | +|-------------------------------|------------|--------------------------------------------------------------------|-----------| +| Baichuan2-7B | vLLM | [✅](models/nlp/llm/baichuan2-7b/vllm) | 4.3.0 | +| ChatGLM-3-6B | vLLM | [✅](models/nlp/llm/chatglm3-6b/vllm) | 4.3.0 | +| ChatGLM-3-6B-32K | vLLM | [✅](models/nlp/llm/chatglm3-6b-32k/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Llama-8B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-llama-8b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Llama-70B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-llama-70b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-1.5B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-7B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-14B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-32B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm) | 4.3.0 | +| ERNIE-4.5-21B-A3B | FastDeploy | [✅](models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/) | 4.3.0 | +| ERNIE-4.5-300B-A47B | FastDeploy | [✅](models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/) | 4.3.0 | +| GLM-4V | vLLM | [✅](models/multimodal/vision_language_model/glm-4v/vllm/) | 4.3.0 | +| InternLM3 | LMDeploy | [✅](models/nlp/llm/internlm3/lmdeploy/) | 4.3.0 | +| Llama2-7B | vLLM | [✅](models/nlp/llm/llama2-7b/vllm) | 4.3.0 | +| Llama2-7B | TRT-LLM | [✅](models/nlp/llm/llama2-7b/trtllm) | 4.3.0 | +| Llama2-13B | TRT-LLM | [✅](models/nlp/llm/llama2-13b/trtllm) | 4.3.0 | +| Llama2-70B | TRT-LLM | [✅](models/nlp/llm/llama2-70b/trtllm) | 4.3.0 | +| Llama3-70B | vLLM | [✅](models/nlp/llm/llama3-70b/vllm) | 4.3.0 | +| LLaVA-NeXT-based | vLLM | [✅](models/multimodal/vision_language_model/llava_next_base/vllm/) | 4.3.0 | +| MiniCPM O | vLLM | [✅](models/multimodal/vision_language_model/minicpm_o/vllm/) | 4.3.0 | +| MiniCPM V | vLLM | [✅](models/multimodal/vision_language_model/minicpm_v/vllm/) | 4.3.0 | +| Qwen-7B | vLLM | [✅](models/nlp/llm/qwen-7b/vllm) | 4.3.0 | +| Qwen-VL | vLLM | [✅](models/multimodal/vision_language_model/qwen_vl/) | 4.3.0 | +| Qwen2-VL | vLLM | [✅](models/multimodal/vision_language_model/qwen2_vl/) | 4.3.0 | +| Qwen2.5-VL | vLLM | [✅](models/multimodal/vision_language_model/qwen2.5_vl/) | 4.3.0 | +| Qwen1.5-7B | vLLM | [✅](models/nlp/llm/qwen1.5-7b/vllm) | 4.3.0 | +| Qwen1.5-7B | TGI | [✅](models/nlp/llm/qwen1.5-7b/tgi) | 4.3.0 | +| Qwen1.5-14B | vLLM | [✅](models/nlp/llm/qwen1.5-14b/vllm) | 4.3.0 | +| Qwen1.5-32B Chat | vLLM | [✅](models/nlp/llm/qwen1.5-32b/vllm) | 4.3.0 | +| Qwen1.5-72B | vLLM | [✅](models/nlp/llm/qwen1.5-72b/vllm) | 4.3.0 | +| Qwen2-7B Instruct | vLLM | [✅](models/nlp/llm/qwen2-7b/vllm) | 4.3.0 | +| Qwen2-72B Instruct | vLLM | [✅](models/nlp/llm/qwen2-72b/vllm) | 4.3.0 | +| StableLM2-1.6B | vLLM | [✅](models/nlp/llm/stablelm/vllm) | 4.3.0 | +| Whisper | vLLM | [✅](models/speech/asr/whisper/vllm/) | 4.3.0 | ### 计算机视觉(CV) -- Gitee From 5f4bf63ccb4224adc659cdedd787652b2a281a84 Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Mon, 11 Aug 2025 10:40:06 +0800 Subject: [PATCH 2/3] add 25.09 llm models to model list --- README.md | 80 +++++++++++++++++++++++++++------------------------- README_en.md | 67 ++++++++++++++++++++++++++----------------- 2 files changed, 82 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 6729fe73..2596d50b 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,14 @@ -[](README_en.md) [](README.md) + + +[English](README_en.md) [Chinese](README.md) # DeepSparkInference
Homepage - - + LICENSE + Release

@@ -24,42 +26,42 @@ ### 大语言模型(LLM) -| Model | Engine | Supported | IXUCA SDK | -|-------------------------------|------------|--------------------------------------------------------------------|-----------| -| Baichuan2-7B | vLLM | [✅](models/nlp/llm/baichuan2-7b/vllm) | 4.3.0 | -| ChatGLM-3-6B | vLLM | [✅](models/nlp/llm/chatglm3-6b/vllm) | 4.3.0 | -| ChatGLM-3-6B-32K | vLLM | [✅](models/nlp/llm/chatglm3-6b-32k/vllm) | 4.3.0 | -| DeepSeek-R1-Distill-Llama-8B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-llama-8b/vllm) | 4.3.0 | -| DeepSeek-R1-Distill-Llama-70B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-llama-70b/vllm) | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-1.5B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm) | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-7B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm) | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-14B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm) | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-32B | vLLM | [✅](models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm) | 4.3.0 | -| ERNIE-4.5-21B-A3B | FastDeploy | [✅](models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/) | 4.3.0 | -| ERNIE-4.5-300B-A47B | FastDeploy | [✅](models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/) | 4.3.0 | -| GLM-4V | vLLM | [✅](models/multimodal/vision_language_model/glm-4v/vllm/) | 4.3.0 | -| InternLM3 | LMDeploy | [✅](models/nlp/llm/internlm3/lmdeploy/) | 4.3.0 | -| Llama2-7B | vLLM | [✅](models/nlp/llm/llama2-7b/vllm) | 4.3.0 | -| Llama2-7B | TRT-LLM | [✅](models/nlp/llm/llama2-7b/trtllm) | 4.3.0 | -| Llama2-13B | TRT-LLM | [✅](models/nlp/llm/llama2-13b/trtllm) | 4.3.0 | -| Llama2-70B | TRT-LLM | [✅](models/nlp/llm/llama2-70b/trtllm) | 4.3.0 | -| Llama3-70B | vLLM | [✅](models/nlp/llm/llama3-70b/vllm) | 4.3.0 | -| LLaVA-NeXT-based | vLLM | [✅](models/multimodal/vision_language_model/llava_next_base/vllm/) | 4.3.0 | -| MiniCPM O | vLLM | [✅](models/multimodal/vision_language_model/minicpm_o/vllm/) | 4.3.0 | -| MiniCPM V | vLLM | [✅](models/multimodal/vision_language_model/minicpm_v/vllm/) | 4.3.0 | -| Qwen-7B | vLLM | [✅](models/nlp/llm/qwen-7b/vllm) | 4.3.0 | -| Qwen-VL | vLLM | [✅](models/multimodal/vision_language_model/qwen_vl/) | 4.3.0 | -| Qwen2-VL | vLLM | [✅](models/multimodal/vision_language_model/qwen2_vl/) | 4.3.0 | -| Qwen2.5-VL | vLLM | [✅](models/multimodal/vision_language_model/qwen2.5_vl/) | 4.3.0 | -| Qwen1.5-7B | vLLM | [✅](models/nlp/llm/qwen1.5-7b/vllm) | 4.3.0 | -| Qwen1.5-7B | TGI | [✅](models/nlp/llm/qwen1.5-7b/tgi) | 4.3.0 | -| Qwen1.5-14B | vLLM | [✅](models/nlp/llm/qwen1.5-14b/vllm) | 4.3.0 | -| Qwen1.5-32B Chat | vLLM | [✅](models/nlp/llm/qwen1.5-32b/vllm) | 4.3.0 | -| Qwen1.5-72B | vLLM | [✅](models/nlp/llm/qwen1.5-72b/vllm) | 4.3.0 | -| Qwen2-7B Instruct | vLLM | [✅](models/nlp/llm/qwen2-7b/vllm) | 4.3.0 | -| Qwen2-72B Instruct | vLLM | [✅](models/nlp/llm/qwen2-72b/vllm) | 4.3.0 | -| StableLM2-1.6B | vLLM | [✅](models/nlp/llm/stablelm/vllm) | 4.3.0 | -| Whisper | vLLM | [✅](models/speech/asr/whisper/vllm/) | 4.3.0 | +| Model | Engine | Supported | IXUCA SDK | +|-------------------------------|--------------|--------------------------------------------------------------------|-----------| +| Baichuan2-7B | `vLLM` | [✅](models/nlp/llm/baichuan2-7b/vllm) | 4.3.0 | +| ChatGLM-3-6B | `vLLM` | [✅](models/nlp/llm/chatglm3-6b/vllm) | 4.3.0 | +| ChatGLM-3-6B-32K | `vLLM` | [✅](models/nlp/llm/chatglm3-6b-32k/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Llama-8B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-llama-8b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Llama-70B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-llama-70b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-1.5B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-7B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-14B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-32B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm) | 4.3.0 | +| ERNIE-4.5-21B-A3B | `FastDeploy` | [✅](models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/) | 4.3.0 | +| ERNIE-4.5-300B-A47B | `FastDeploy` | [✅](models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/) | 4.3.0 | +| GLM-4V | `vLLM` | [✅](models/multimodal/vision_language_model/glm-4v/vllm/) | 4.3.0 | +| InternLM3 | `LMDeploy` | [✅](models/nlp/llm/internlm3/lmdeploy/) | 4.3.0 | +| Llama2-7B | `vLLM` | [✅](models/nlp/llm/llama2-7b/vllm) | 4.3.0 | +| Llama2-7B | `TRT-LLM` | [✅](models/nlp/llm/llama2-7b/trtllm) | 4.3.0 | +| Llama2-13B | `TRT-LLM` | [✅](models/nlp/llm/llama2-13b/trtllm) | 4.3.0 | +| Llama2-70B | `TRT-LLM` | [✅](models/nlp/llm/llama2-70b/trtllm) | 4.3.0 | +| Llama3-70B | `vLLM` | [✅](models/nlp/llm/llama3-70b/vllm) | 4.3.0 | +| LLaVA-NeXT-based | `vLLM` | [✅](models/multimodal/vision_language_model/llava_next_base/vllm/) | 4.3.0 | +| MiniCPM-o | `vLLM` | [✅](models/multimodal/vision_language_model/minicpm_o/vllm/) | 4.3.0 | +| MiniCPM-V | `vLLM` | [✅](models/multimodal/vision_language_model/minicpm_v/vllm/) | 4.3.0 | +| Qwen-7B | `vLLM` | [✅](models/nlp/llm/qwen-7b/vllm) | 4.3.0 | +| Qwen-VL | `vLLM` | [✅](models/multimodal/vision_language_model/qwen_vl/) | 4.3.0 | +| Qwen2-VL | `vLLM` | [✅](models/multimodal/vision_language_model/qwen2_vl/) | 4.3.0 | +| Qwen2.5-VL | `vLLM` | [✅](models/multimodal/vision_language_model/qwen2.5_vl/) | 4.3.0 | +| Qwen1.5-7B | `vLLM` | [✅](models/nlp/llm/qwen1.5-7b/vllm) | 4.3.0 | +| Qwen1.5-7B | `TGI` | [✅](models/nlp/llm/qwen1.5-7b/tgi) | 4.3.0 | +| Qwen1.5-14B | `vLLM` | [✅](models/nlp/llm/qwen1.5-14b/vllm) | 4.3.0 | +| Qwen1.5-32B Chat | `vLLM` | [✅](models/nlp/llm/qwen1.5-32b/vllm) | 4.3.0 | +| Qwen1.5-72B | `vLLM` | [✅](models/nlp/llm/qwen1.5-72b/vllm) | 4.3.0 | +| Qwen2-7B Instruct | `vLLM` | [✅](models/nlp/llm/qwen2-7b/vllm) | 4.3.0 | +| Qwen2-72B Instruct | `vLLM` | [✅](models/nlp/llm/qwen2-72b/vllm) | 4.3.0 | +| StableLM2-1.6B | `vLLM` | [✅](models/nlp/llm/stablelm/vllm) | 4.3.0 | +| Whisper | `vLLM` | [✅](models/speech/asr/whisper/vllm/) | 4.3.0 | ### 计算机视觉(CV) diff --git a/README_en.md b/README_en.md index af0e89c1..f6da1604 100644 --- a/README_en.md +++ b/README_en.md @@ -1,12 +1,14 @@ -[](README_en.md) [](README.md) + + +[English](README_en.md) [Chinese](README.md) # DeepSparkInference
Homepage - - + LICENSE + Release

@@ -34,29 +36,42 @@ inference to be expanded in the future. ### LLM (Large Language Model) -| Model | vLLM | TRT-LLM | TGI | IXUCA SDK | -|-------------------------------|--------------------------------------------------------|---------------------------------------|------------------------------------|-----------| -| Baichuan2-7B | [✅](models/nlp/llm/baichuan2-7b/vllm) | | | 4.3.0 | -| ChatGLM-3-6B | [✅](models/nlp/llm/chatglm3-6b/vllm) | | | 4.3.0 | -| ChatGLM-3-6B-32K | [✅](models/nlp/llm/chatglm3-6b-32k/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Llama-8B | [✅](models/nlp/llm/deepseek-r1-distill-llama-8b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Llama-70B | [✅](models/nlp/llm/deepseek-r1-distill-llama-70b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-1.5B | [✅](models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-7B | [✅](models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-14B | [✅](models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm) | | | 4.3.0 | -| DeepSeek-R1-Distill-Qwen-32B | [✅](models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm) | | | 4.3.0 | -| Llama2-7B | [✅](models/nlp/llm/llama2-7b/vllm) | [✅](models/nlp/llm/llama2-7b/trtllm) | | 4.3.0 | -| Llama2-13B | | [✅](models/nlp/llm/llama2-13b/trtllm) | | 4.3.0 | -| Llama2-70B | | [✅](models/nlp/llm/llama2-70b/trtllm) | | 4.3.0 | -| Llama3-70B | [✅](models/nlp/llm/llama3-70b/vllm) | | | 4.3.0 | -| Qwen-7B | [✅](models/nlp/llm/qwen-7b/vllm) | | | 4.3.0 | -| Qwen1.5-7B | [✅](models/nlp/llm/qwen1.5-7b/vllm) | | [✅](models/nlp/llm/qwen1.5-7b/tgi) | 4.3.0 | -| Qwen1.5-14B | [✅](models/nlp/llm/qwen1.5-14b/vllm) | | | 4.3.0 | -| Qwen1.5-32B Chat | [✅](models/nlp/llm/qwen1.5-32b/vllm) | | | 4.3.0 | -| Qwen1.5-72B | [✅](models/nlp/llm/qwen1.5-72b/vllm) | | | 4.3.0 | -| Qwen2-7B Instruct | [✅](models/nlp/llm/qwen2-7b/vllm) | | | 4.3.0 | -| Qwen2-72B Instruct | [✅](models/nlp/llm/qwen2-72b/vllm) | | | 4.3.0 | -| StableLM2-1.6B | [✅](models/nlp/llm/stablelm/vllm) | | | 4.3.0 | +| Model | Engine | Supported | IXUCA SDK | +|-------------------------------|--------------|--------------------------------------------------------------------|-----------| +| Baichuan2-7B | `vLLM` | [✅](models/nlp/llm/baichuan2-7b/vllm) | 4.3.0 | +| ChatGLM-3-6B | `vLLM` | [✅](models/nlp/llm/chatglm3-6b/vllm) | 4.3.0 | +| ChatGLM-3-6B-32K | `vLLM` | [✅](models/nlp/llm/chatglm3-6b-32k/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Llama-8B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-llama-8b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Llama-70B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-llama-70b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-1.5B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-7B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-qwen-7b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-14B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-qwen-14b/vllm) | 4.3.0 | +| DeepSeek-R1-Distill-Qwen-32B | `vLLM` | [✅](models/nlp/llm/deepseek-r1-distill-qwen-32b/vllm) | 4.3.0 | +| ERNIE-4.5-21B-A3B | `FastDeploy` | [✅](models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/) | 4.3.0 | +| ERNIE-4.5-300B-A47B | `FastDeploy` | [✅](models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/) | 4.3.0 | +| GLM-4V | `vLLM` | [✅](models/multimodal/vision_language_model/glm-4v/vllm/) | 4.3.0 | +| InternLM3 | `LMDeploy` | [✅](models/nlp/llm/internlm3/lmdeploy/) | 4.3.0 | +| Llama2-7B | `vLLM` | [✅](models/nlp/llm/llama2-7b/vllm) | 4.3.0 | +| Llama2-7B | `TRT-LLM` | [✅](models/nlp/llm/llama2-7b/trtllm) | 4.3.0 | +| Llama2-13B | `TRT-LLM` | [✅](models/nlp/llm/llama2-13b/trtllm) | 4.3.0 | +| Llama2-70B | `TRT-LLM` | [✅](models/nlp/llm/llama2-70b/trtllm) | 4.3.0 | +| Llama3-70B | `vLLM` | [✅](models/nlp/llm/llama3-70b/vllm) | 4.3.0 | +| LLaVA-NeXT-based | `vLLM` | [✅](models/multimodal/vision_language_model/llava_next_base/vllm/) | 4.3.0 | +| MiniCPM-o | `vLLM` | [✅](models/multimodal/vision_language_model/minicpm_o/vllm/) | 4.3.0 | +| MiniCPM-V | `vLLM` | [✅](models/multimodal/vision_language_model/minicpm_v/vllm/) | 4.3.0 | +| Qwen-7B | `vLLM` | [✅](models/nlp/llm/qwen-7b/vllm) | 4.3.0 | +| Qwen-VL | `vLLM` | [✅](models/multimodal/vision_language_model/qwen_vl/) | 4.3.0 | +| Qwen2-VL | `vLLM` | [✅](models/multimodal/vision_language_model/qwen2_vl/) | 4.3.0 | +| Qwen2.5-VL | `vLLM` | [✅](models/multimodal/vision_language_model/qwen2.5_vl/) | 4.3.0 | +| Qwen1.5-7B | `vLLM` | [✅](models/nlp/llm/qwen1.5-7b/vllm) | 4.3.0 | +| Qwen1.5-7B | `TGI` | [✅](models/nlp/llm/qwen1.5-7b/tgi) | 4.3.0 | +| Qwen1.5-14B | `vLLM` | [✅](models/nlp/llm/qwen1.5-14b/vllm) | 4.3.0 | +| Qwen1.5-32B Chat | `vLLM` | [✅](models/nlp/llm/qwen1.5-32b/vllm) | 4.3.0 | +| Qwen1.5-72B | `vLLM` | [✅](models/nlp/llm/qwen1.5-72b/vllm) | 4.3.0 | +| Qwen2-7B Instruct | `vLLM` | [✅](models/nlp/llm/qwen2-7b/vllm) | 4.3.0 | +| Qwen2-72B Instruct | `vLLM` | [✅](models/nlp/llm/qwen2-72b/vllm) | 4.3.0 | +| StableLM2-1.6B | `vLLM` | [✅](models/nlp/llm/stablelm/vllm) | 4.3.0 | +| Whisper | `vLLM` | [✅](models/speech/asr/whisper/vllm/) | 4.3.0 | ### Computer Vision -- Gitee From 65ed33796cb3ead408cb2ec72759a17463c54c7b Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Mon, 11 Aug 2025 10:42:03 +0800 Subject: [PATCH 3/3] disable not necessary markdownlint --- RELEASE.md | 3 +++ .../vision_language_model/minicpm_o/vllm/README.md | 9 ++++++++- .../vision_language_model/minicpm_v/vllm/README.md | 2 +- models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/README.md | 2 -- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index bd3f8502..11c6fa09 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,6 @@ + + + # DeepSparkInference Release Notes ## 25.06 Release Notes diff --git a/models/multimodal/vision_language_model/minicpm_o/vllm/README.md b/models/multimodal/vision_language_model/minicpm_o/vllm/README.md index 2fa3a6bf..2082cff4 100644 --- a/models/multimodal/vision_language_model/minicpm_o/vllm/README.md +++ b/models/multimodal/vision_language_model/minicpm_o/vllm/README.md @@ -2,7 +2,13 @@ ## Model Description -The most capable model in the MiniCPM-o series. With a total of 8B parameters, this end-to-end model achieves comparable performance to GPT-4o-202405 in vision, speech, and multimodal live streaming, making it one of the most versatile and performant models in the open-source community. For the new voice mode, MiniCPM-o 2.6 supports bilingual real-time speech conversation with configurable voices, and also allows for fun capabilities such as emotion/speed/style control, end-to-end voice cloning, role play, etc. It also advances MiniCPM-V 2.6's visual capabilities such strong OCR capability, trustworthy behavior, multilingual support, and video understanding. Due to its superior token density, MiniCPM-o 2.6 can for the first time support multimodal live streaming on end-side devices such as iPad. +The most capable model in the MiniCPM-o series. With a total of 8B parameters, this end-to-end model achieves comparable +performance to GPT-4o-202405 in vision, speech, and multimodal live streaming, making it one of the most versatile and +performant models in the open-source community. For the new voice mode, MiniCPM-o 2.6 supports bilingual real-time +speech conversation with configurable voices, and also allows for fun capabilities such as emotion/speed/style control, +end-to-end voice cloning, role play, etc. It also advances MiniCPM-V 2.6's visual capabilities such strong OCR +capability, trustworthy behavior, multilingual support, and video understanding. Due to its superior token density, +MiniCPM-o 2.6 can for the first time support multimodal live streaming on end-side devices such as iPad. ## Supported Environments @@ -23,6 +29,7 @@ cp -r ../../vllm_public_assets/ ./ ### Install Dependencies Contact the Iluvatar administrator to get the missing packages: + - transformers-4.45.2+corex.4.3.0-py3-none-any.whl ## Model Inference diff --git a/models/multimodal/vision_language_model/minicpm_v/vllm/README.md b/models/multimodal/vision_language_model/minicpm_v/vllm/README.md index ef8fa31b..fa0fdb36 100644 --- a/models/multimodal/vision_language_model/minicpm_v/vllm/README.md +++ b/models/multimodal/vision_language_model/minicpm_v/vllm/README.md @@ -2,7 +2,7 @@ ## Model Description -MiniCPM V2 is a compact and efficient language model designed for various natural language processing (NLP) tasks. +MiniCPM-V 2 is a compact and efficient language model designed for various natural language processing (NLP) tasks. Building on its predecessor, MiniCPM-V-1, this model integrates advancements in architecture and optimization techniques, making it suitable for deployment in resource-constrained environments.s diff --git a/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/README.md b/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/README.md index cdbeca69..1cdc44e6 100644 --- a/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/README.md +++ b/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/README.md @@ -23,8 +23,6 @@ issue will be optimized in subsequent versions. - Model: - - ```sh # Pull the docker image docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-ixuca:latest -- Gitee