From ef351b980ca118bad2c885f416b811fcabd35c15 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Thu, 24 Apr 2025 13:52:24 +0800 Subject: [PATCH] remove old minicpm-v-2 --- README.md | 2 +- README_en.md | 2 +- .../minicpm_v_2/vllm/README.md | 51 ----------- .../minicpm_v_2/vllm/ci/prepare.sh | 31 ------- .../minicpm_v_2/vllm/minicpmv-2.0-offline.py | 91 ------------------- tests/model_info.json | 6 +- tests/run_vllm.py | 9 -- 7 files changed, 5 insertions(+), 187 deletions(-) delete mode 100644 models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md delete mode 100644 models/multimodal/vision_language_model/minicpm_v_2/vllm/ci/prepare.sh delete mode 100644 models/multimodal/vision_language_model/minicpm_v_2/vllm/minicpmv-2.0-offline.py diff --git a/README.md b/README.md index 5f67cbb9..2dc67b0e 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 | InternVL2-4B | [✅](models/multimodal/vision_language_model/intern_vl/vllm) | | 4.2.0 | | LLaVA | [✅](models/multimodal/vision_language_model/llava/vllm) | | 4.2.0 | | LLaVA-Next-Video-7B | [✅](models/multimodal/vision_language_model/llava_next_video_7b/vllm) | | 4.2.0 | -| MiniCPM V2 | [✅](models/multimodal/vision_language_model/minicpm_v_2/vllm) | | 4.2.0 | +| MiniCPM V2 | [✅](models/multimodal/vision_language_model/minicpm_v/vllm) | | 4.2.0 | ### 自然语言处理(NLP) diff --git a/README_en.md b/README_en.md index 091b9a7d..6450df59 100644 --- a/README_en.md +++ b/README_en.md @@ -252,7 +252,7 @@ inference to be expanded in the future. | InternVL2-4B | [✅](models/multimodal/vision_language_model/intern_vl/vllm) | | 4.2.0 | | LLaVA | [✅](models/multimodal/vision_language_model/llava/vllm) | | 4.2.0 | | LLaVA-Next-Video-7B | [✅](models/multimodal/vision_language_model/llava_next_video_7b/vllm) | | 4.2.0 | -| MiniCPM V2 | [✅](models/multimodal/vision_language_model/minicpm_v_2/vllm) | | 4.2.0 | +| MiniCPM V2 | [✅](models/multimodal/vision_language_model/minicpm_v/vllm) | | 4.2.0 | ### NLP diff --git a/models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md b/models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md deleted file mode 100644 index f264aa2c..00000000 --- a/models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# MiniCPM V2 - -## Model Description - -MiniCPM V2 is a compact and efficient language model designed for various natural language processing (NLP) tasks. Building on its predecessor, MiniCPM-V-1, this model integrates advancements in architecture and optimization techniques, making it suitable for deployment in resource-constrained environments.s - -## Supported Environments - -| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | -|--------|-----------|---------| -| MR-V100 | 4.2.0 | 25.03 | - -## Model Preparation - -### Prepare Resources - -- Model: -Note: Due to the official weights missing some necessary files for vllm execution, you can download the additional files from here: to ensure that the file directory matches the structure shown here: . - -```bash -# Download model from the website and make sure the model's path is "data/MiniCPM-V-2" -mkdir data/ -``` - -### Install Dependencies - -In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website. - -```bash -# Install libGL -## CentOS -yum install -y mesa-libGL -## Ubuntu -apt install -y libgl1-mesa-glx -pip3 install timm==0.9.10 -pip3 install transformers -pip3 install --user --upgrade pillow -i https://pypi.tuna.tsinghua.edu.cn/simple -``` - -## Model Inference - -```bash -export PT_SDPA_ENABLE_HEAD_DIM_PADDING=1 -export PATH=/usr/local/corex/bin:${PATH} -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64 -``` - -```bash -wget https://img.zcool.cn/community/012e285a1ea496a8012171323c6bf1.jpg -O dog.jpg -python3 minicpmv-2.0-offline.py --model-path /path/to/model --image-path ./dog.jpg -``` diff --git a/models/multimodal/vision_language_model/minicpm_v_2/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/minicpm_v_2/vllm/ci/prepare.sh deleted file mode 100644 index f1c0b9c8..00000000 --- a/models/multimodal/vision_language_model/minicpm_v_2/vllm/ci/prepare.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -x -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -if [[ ${ID} == "ubuntu" ]]; then - apt install -y libgl1-mesa-glx -elif [[ ${ID} == "centos" ]]; then - yum install -y mesa-libGL -else - echo "Not Support Os" -fi - -pip3 install timm==0.9.10 -pip3 install transformers -pip3 install --user --upgrade pillow -i https://pypi.tuna.tsinghua.edu.cn/simple - -cp /mnt/deepspark/data/datasets/dog.jpg ./ \ No newline at end of file diff --git a/models/multimodal/vision_language_model/minicpm_v_2/vllm/minicpmv-2.0-offline.py b/models/multimodal/vision_language_model/minicpm_v_2/vllm/minicpmv-2.0-offline.py deleted file mode 100644 index 1da0fdd8..00000000 --- a/models/multimodal/vision_language_model/minicpm_v_2/vllm/minicpmv-2.0-offline.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from PIL import Image -from transformers import AutoTokenizer -from vllm import LLM, SamplingParams -import argparse - -def main(args): - # 图像文件路径列表 - ## wget https://img.zcool.cn/community/012e285a1ea496a8012171323c6bf1.jpg@3000w_1l_0o_100sh.jpg -O dog.jpg - IMAGES = [ - args.image_path, # 本地图片路径 - ] - - # 模型名称或路径 - MODEL_NAME = args.model_path # 本地模型路径或Hugging Face模型名称 - - # 打开并转换图像 - image = Image.open(IMAGES[0]).convert("RGB") - - # 初始化分词器 - tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) - - # 初始化语言模型 - llm = LLM(model=MODEL_NAME, - gpu_memory_utilization=0.95, # 使用全部GPU内存 - trust_remote_code=True, - max_model_len=2048, - # max_num_seqs=1, - max_num_batched_tokens=2048,) # 根据内存状况可调整此值 - - # 构建对话消息 - messages = [{'role': 'user', 'content': '(./)\n' + '请描述这张图片'}] - - # 应用对话模板到消息 - prompt = tokenizer.apply_chat_template(messages) - - # 设置停止符ID - # 2.0 - stop_token_ids = [tokenizer.eos_id] - # 2.5 - #stop_token_ids = [tokenizer.eos_id, tokenizer.eot_id] - # 2.6 - # stop_tokens = ['<|im_end|>', '<|endoftext|>'] - # stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens] - - # 设置生成参数 - sampling_params = SamplingParams( - stop_token_ids=stop_token_ids, - # temperature=0.7, - # top_p=0.8, - # top_k=100, - # seed=3472, - max_tokens=1024, - # min_tokens=150, - temperature=0, - # use_beam_search=False, - # length_penalty=1.2, - best_of=1) - - # 获取模型输出 - outputs = llm.generate({ - "prompt": prompt, - "multi_modal_data": { - "image": image - } - }, sampling_params=sampling_params) - print(outputs[0].outputs[0].text) - - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--model-path", type=str, default=None, help="model path") - parser.add_argument("--image-path", type=str, default=None, help="sample image path") - args = parser.parse_args() - - main(args) \ No newline at end of file diff --git a/tests/model_info.json b/tests/model_info.json index e30687a1..2c9fec4f 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -4859,7 +4859,7 @@ "demoType": "" }, { - "model_name": "minicpm_v_2", + "model_name": "minicpm_v", "framework": "vllm", "release_version": "25.03", "release_sdk": "CoreX 4.2.0", @@ -4871,8 +4871,8 @@ "mdims": "", "dataset": "", "license": "", - "model_path": "models/multimodal/vision_language_model/minicpm_v_2/vllm/", - "readme_file": "models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md", + "model_path": "models/multimodal/vision_language_model/minicpm_v/vllm/", + "readme_file": "models/multimodal/vision_language_model/minicpm_v/vllm/README.md", "bitbucket_repo": "", "bitbucket_branch": "", "bitbucket_path": "", diff --git a/tests/run_vllm.py b/tests/run_vllm.py index 96151bd4..a200569c 100644 --- a/tests/run_vllm.py +++ b/tests/run_vllm.py @@ -204,15 +204,6 @@ def run_nlp_testcase(model): export CUDA_VISIBLE_DEVICES=0,1 python3 offline_inference.py --model ./stablelm --max-tokens 256 -tp 1 --temperature 0.0 """ - elif model_name == "minicpm-v-2": - script = f""" - set -x - cd ../{model['model_path']} - export PT_SDPA_ENABLE_HEAD_DIM_PADDING=1 - export PATH=/usr/local/corex/bin:${PATH} - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64 - python3 minicpmv-2.0-offline.py --model-path ./minicpm-v-2 --image-path ./dog.jpg - """ elif model_name.startswith("deepseek-r1-distill-"): script = f""" set -x -- Gitee