From ef351b980ca118bad2c885f416b811fcabd35c15 Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Thu, 24 Apr 2025 13:52:24 +0800
Subject: [PATCH] remove old minicpm-v-2

---
 README.md                                     |  2 +-
 README_en.md                                  |  2 +-
 .../minicpm_v_2/vllm/README.md                | 51 -----------
 .../minicpm_v_2/vllm/ci/prepare.sh            | 31 -------
 .../minicpm_v_2/vllm/minicpmv-2.0-offline.py  | 91 -------------------
 tests/model_info.json                         |  6 +-
 tests/run_vllm.py                             |  9 --
 7 files changed, 5 insertions(+), 187 deletions(-)
 delete mode 100644 models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md
 delete mode 100644 models/multimodal/vision_language_model/minicpm_v_2/vllm/ci/prepare.sh
 delete mode 100644 models/multimodal/vision_language_model/minicpm_v_2/vllm/minicpmv-2.0-offline.py

diff --git a/README.md b/README.md
index 5f67cbb9..2dc67b0e 100644
--- a/README.md
+++ b/README.md
@@ -242,7 +242,7 @@ DeepSparkInference将按季度进行版本更新，后续会逐步丰富模型
 | InternVL2-4B        | [✅](models/multimodal/vision_language_model/intern_vl/vllm)           |                                                            | 4.2.0     |
 | LLaVA               | [✅](models/multimodal/vision_language_model/llava/vllm)               |                                                            | 4.2.0     |
 | LLaVA-Next-Video-7B | [✅](models/multimodal/vision_language_model/llava_next_video_7b/vllm) |                                                            | 4.2.0     |
-| MiniCPM V2          | [✅](models/multimodal/vision_language_model/minicpm_v_2/vllm)         |                                                            | 4.2.0     |
+| MiniCPM V2          | [✅](models/multimodal/vision_language_model/minicpm_v/vllm)         |                                                            | 4.2.0     |
 
 ### 自然语言处理（NLP）
 
diff --git a/README_en.md b/README_en.md
index 091b9a7d..6450df59 100644
--- a/README_en.md
+++ b/README_en.md
@@ -252,7 +252,7 @@ inference to be expanded in the future.
 | InternVL2-4B        | [✅](models/multimodal/vision_language_model/intern_vl/vllm)           |                                                            | 4.2.0     |
 | LLaVA               | [✅](models/multimodal/vision_language_model/llava/vllm)               |                                                            | 4.2.0     |
 | LLaVA-Next-Video-7B | [✅](models/multimodal/vision_language_model/llava_next_video_7b/vllm) |                                                            | 4.2.0     |
-| MiniCPM V2          | [✅](models/multimodal/vision_language_model/minicpm_v_2/vllm)         |                                                            | 4.2.0     |
+| MiniCPM V2          | [✅](models/multimodal/vision_language_model/minicpm_v/vllm)         |                                                            | 4.2.0     |
 
 ### NLP
 
diff --git a/models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md b/models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md
deleted file mode 100644
index f264aa2c..00000000
--- a/models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md
+++ /dev/null
@@ -1,51 +0,0 @@
-# MiniCPM V2
-
-## Model Description
-
-MiniCPM V2 is a compact and efficient language model designed for various natural language processing (NLP) tasks. Building on its predecessor, MiniCPM-V-1, this model integrates advancements in architecture and optimization techniques, making it suitable for deployment in resource-constrained environments.s
-
-## Supported Environments
-
-| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
-|--------|-----------|---------|
-| MR-V100 | 4.2.0     |  25.03  |
-
-## Model Preparation
-
-### Prepare Resources
-
-- Model: <https://huggingface.co/openbmb/MiniCPM-V-2>
-Note: Due to the official weights missing some necessary files for vllm execution, you can download the additional files from here: <https://github.com/HwwwwwwwH/MiniCPM-V-2> to ensure that the file directory matches the structure shown here: <https://github.com/HwwwwwwwH/MiniCPM-V-2>.
-
-```bash
-# Download model from the website and make sure the model's path is "data/MiniCPM-V-2"
-mkdir data/
-```
-
-### Install Dependencies
-
-In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website.
-
-```bash
-# Install libGL
-## CentOS
-yum install -y mesa-libGL
-## Ubuntu
-apt install -y libgl1-mesa-glx
-pip3 install timm==0.9.10
-pip3 install transformers
-pip3 install --user --upgrade pillow -i https://pypi.tuna.tsinghua.edu.cn/simple
-```
-
-## Model Inference
-
-```bash
-export PT_SDPA_ENABLE_HEAD_DIM_PADDING=1
-export PATH=/usr/local/corex/bin:${PATH}
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64 
-```
-
-```bash
-wget https://img.zcool.cn/community/012e285a1ea496a8012171323c6bf1.jpg -O dog.jpg
-python3 minicpmv-2.0-offline.py --model-path /path/to/model --image-path ./dog.jpg
-```
diff --git a/models/multimodal/vision_language_model/minicpm_v_2/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/minicpm_v_2/vllm/ci/prepare.sh
deleted file mode 100644
index f1c0b9c8..00000000
--- a/models/multimodal/vision_language_model/minicpm_v_2/vllm/ci/prepare.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License. You may obtain
-# a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -x
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-if [[ ${ID} == "ubuntu" ]]; then
-    apt install -y libgl1-mesa-glx
-elif [[ ${ID} == "centos" ]]; then
-    yum install -y mesa-libGL
-else
-    echo "Not Support Os"
-fi
-
-pip3 install timm==0.9.10
-pip3 install transformers
-pip3 install --user --upgrade pillow -i https://pypi.tuna.tsinghua.edu.cn/simple
-
-cp /mnt/deepspark/data/datasets/dog.jpg ./
\ No newline at end of file
diff --git a/models/multimodal/vision_language_model/minicpm_v_2/vllm/minicpmv-2.0-offline.py b/models/multimodal/vision_language_model/minicpm_v_2/vllm/minicpmv-2.0-offline.py
deleted file mode 100644
index 1da0fdd8..00000000
--- a/models/multimodal/vision_language_model/minicpm_v_2/vllm/minicpmv-2.0-offline.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-from PIL import Image
-from transformers import AutoTokenizer
-from vllm import LLM, SamplingParams
-import argparse
-
-def main(args):
-    # 图像文件路径列表
-    ## wget https://img.zcool.cn/community/012e285a1ea496a8012171323c6bf1.jpg@3000w_1l_0o_100sh.jpg -O dog.jpg
-    IMAGES = [
-        args.image_path,  # 本地图片路径
-    ]
-
-    # 模型名称或路径
-    MODEL_NAME = args.model_path  # 本地模型路径或Hugging Face模型名称
-
-    # 打开并转换图像
-    image = Image.open(IMAGES[0]).convert("RGB")
-
-    # 初始化分词器
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
-
-    # 初始化语言模型
-    llm = LLM(model=MODEL_NAME,
-            gpu_memory_utilization=0.95,  # 使用全部GPU内存
-            trust_remote_code=True,
-            max_model_len=2048,
-            # max_num_seqs=1,
-            max_num_batched_tokens=2048,)  # 根据内存状况可调整此值
-
-    # 构建对话消息
-    messages = [{'role': 'user', 'content': '(<image>./</image>)\n' + '请描述这张图片'}]
-
-    # 应用对话模板到消息
-    prompt = tokenizer.apply_chat_template(messages)
-
-    # 设置停止符ID
-    # 2.0
-    stop_token_ids = [tokenizer.eos_id]
-    # 2.5
-    #stop_token_ids = [tokenizer.eos_id, tokenizer.eot_id]
-    # 2.6 
-    # stop_tokens = ['<|im_end|>', '<|endoftext|>']
-    # stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
-
-    # 设置生成参数
-    sampling_params = SamplingParams(
-        stop_token_ids=stop_token_ids,
-        # temperature=0.7,
-        # top_p=0.8,
-        # top_k=100,
-        # seed=3472,
-        max_tokens=1024,
-        # min_tokens=150,
-        temperature=0,
-        # use_beam_search=False,
-        # length_penalty=1.2,
-        best_of=1)
-
-    # 获取模型输出
-    outputs = llm.generate({
-        "prompt": prompt,
-        "multi_modal_data": {
-            "image": image
-        }
-    }, sampling_params=sampling_params)
-    print(outputs[0].outputs[0].text)
-
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default=None, help="model path")
-    parser.add_argument("--image-path", type=str, default=None, help="sample image path")
-    args = parser.parse_args()
-
-    main(args)
\ No newline at end of file
diff --git a/tests/model_info.json b/tests/model_info.json
index e30687a1..2c9fec4f 100644
--- a/tests/model_info.json
+++ b/tests/model_info.json
@@ -4859,7 +4859,7 @@
             "demoType": ""
         },
         {
-            "model_name": "minicpm_v_2",
+            "model_name": "minicpm_v",
             "framework": "vllm",
             "release_version": "25.03",
             "release_sdk": "CoreX 4.2.0",
@@ -4871,8 +4871,8 @@
             "mdims": "",
             "dataset": "",
             "license": "",
-            "model_path": "models/multimodal/vision_language_model/minicpm_v_2/vllm/",
-            "readme_file": "models/multimodal/vision_language_model/minicpm_v_2/vllm/README.md",
+            "model_path": "models/multimodal/vision_language_model/minicpm_v/vllm/",
+            "readme_file": "models/multimodal/vision_language_model/minicpm_v/vllm/README.md",
             "bitbucket_repo": "",
             "bitbucket_branch": "",
             "bitbucket_path": "",
diff --git a/tests/run_vllm.py b/tests/run_vllm.py
index 96151bd4..a200569c 100644
--- a/tests/run_vllm.py
+++ b/tests/run_vllm.py
@@ -204,15 +204,6 @@ def run_nlp_testcase(model):
             export CUDA_VISIBLE_DEVICES=0,1
             python3 offline_inference.py --model ./stablelm --max-tokens 256 -tp 1 --temperature 0.0
             """
-        elif model_name == "minicpm-v-2":
-            script = f"""
-            set -x
-            cd ../{model['model_path']}
-            export PT_SDPA_ENABLE_HEAD_DIM_PADDING=1
-            export PATH=/usr/local/corex/bin:${PATH}
-            export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
-            python3 minicpmv-2.0-offline.py --model-path ./minicpm-v-2 --image-path ./dog.jpg
-            """
         elif model_name.startswith("deepseek-r1-distill-"):
             script = f"""
             set -x
-- 
Gitee